Streamlit_OCR_comparator

Sleeping

App Files Files Community

Loren commited on Nov 17, 2022

Commit

9c06ac7

•

1 Parent(s): 9c9c828

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Home.py +19 -0
README.md +6 -7
configs/_base_/default_runtime.py +17 -0
configs/_base_/det_datasets/ctw1500.py +18 -0
configs/_base_/det_datasets/icdar2015.py +18 -0
configs/_base_/det_datasets/icdar2017.py +18 -0
configs/_base_/det_datasets/synthtext.py +18 -0
configs/_base_/det_datasets/toy_data.py +41 -0
configs/_base_/det_models/dbnet_r18_fpnc.py +21 -0
configs/_base_/det_models/dbnet_r50dcnv2_fpnc.py +23 -0
configs/_base_/det_models/dbnetpp_r50dcnv2_fpnc.py +28 -0
configs/_base_/det_models/drrg_r50_fpn_unet.py +21 -0
configs/_base_/det_models/fcenet_r50_fpn.py +33 -0
configs/_base_/det_models/fcenet_r50dcnv2_fpn.py +35 -0
configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py +126 -0
configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py +126 -0
configs/_base_/det_models/panet_r18_fpem_ffm.py +43 -0
configs/_base_/det_models/panet_r50_fpem_ffm.py +21 -0
configs/_base_/det_models/psenet_r50_fpnf.py +51 -0
configs/_base_/det_models/textsnake_r50_fpn_unet.py +22 -0
configs/_base_/det_pipelines/dbnet_pipeline.py +88 -0
configs/_base_/det_pipelines/drrg_pipeline.py +60 -0
configs/_base_/det_pipelines/fcenet_pipeline.py +118 -0
configs/_base_/det_pipelines/maskrcnn_pipeline.py +57 -0
configs/_base_/det_pipelines/panet_pipeline.py +156 -0
configs/_base_/det_pipelines/psenet_pipeline.py +70 -0
configs/_base_/det_pipelines/textsnake_pipeline.py +65 -0
configs/_base_/recog_datasets/MJ_train.py +21 -0
configs/_base_/recog_datasets/ST_MJ_alphanumeric_train.py +31 -0
configs/_base_/recog_datasets/ST_MJ_train.py +29 -0
configs/_base_/recog_datasets/ST_SA_MJ_real_train.py +81 -0
configs/_base_/recog_datasets/ST_SA_MJ_train.py +48 -0
configs/_base_/recog_datasets/ST_charbox_train.py +23 -0
configs/_base_/recog_datasets/academic_test.py +57 -0
configs/_base_/recog_datasets/seg_toy_data.py +34 -0
configs/_base_/recog_datasets/toy_data.py +54 -0
configs/_base_/recog_models/abinet.py +70 -0
configs/_base_/recog_models/crnn.py +12 -0
configs/_base_/recog_models/crnn_tps.py +18 -0
configs/_base_/recog_models/master.py +61 -0
configs/_base_/recog_models/nrtr_modality_transform.py +11 -0
configs/_base_/recog_models/robust_scanner.py +24 -0
configs/_base_/recog_models/sar.py +24 -0
configs/_base_/recog_models/satrn.py +11 -0
configs/_base_/recog_models/seg.py +21 -0
configs/_base_/recog_pipelines/abinet_pipeline.py +96 -0
configs/_base_/recog_pipelines/crnn_pipeline.py +35 -0
configs/_base_/recog_pipelines/crnn_tps_pipeline.py +37 -0
configs/_base_/recog_pipelines/master_pipeline.py +42 -0
configs/_base_/recog_pipelines/nrtr_pipeline.py +38 -0

Home.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import streamlit as st
+st.set_page_config(page_title='OCR Comparator', layout ="wide")
+st.image('ocr.png')
+st.write("")
+st.markdown('''#### OCR, or Optical Character Recognition, is a computer vision task, \
+which includes the detection of text areas, and the recognition of characters.''')
+st.write("")
+st.write("")
+st.markdown("#####  This app allows you to compare, from a given image, the results of different solutions:")
+st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*")
+st.write("")
+st.write("")
+st.markdown("👈 Select the **About** page from the sidebar for information on how the app works")
+st.markdown("👈 or directly select the **App** page")

README.md CHANGED Viewed

@@ -1,12 +1,11 @@
 ---
 title: Streamlit OCR Comparator
-emoji: 🐨
 colorFrom: indigo
-colorTo: pink
 sdk: streamlit
 sdk_version: 1.10.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Streamlit OCR Comparator
+emoji: 📰🔍🔤
 colorFrom: indigo
+colorTo: gray
 sdk: streamlit
 sdk_version: 1.10.0
+app_file: Home.py
+tags: [streamlit, ocr]
+pinned: true
+---

configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# yapf:disable
+log_config = dict(
+    interval=5,
+    hooks=[
+        dict(type='TextLoggerHook')
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+# disable opencv multithreading to avoid system being overloaded
+opencv_num_threads = 0
+# set multi-process start method as `fork` to speed up the training
+mp_start_method = 'fork'

configs/_base_/det_datasets/ctw1500.py ADDED Viewed

	@@ -0,0 +1,18 @@

+dataset_type = 'IcdarDataset'
+data_root = 'data/ctw1500'
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_training.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+test = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_test.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+train_list = [train]
+test_list = [test]

configs/_base_/det_datasets/icdar2015.py ADDED Viewed

	@@ -0,0 +1,18 @@

+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015'
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_training.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+test = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_test.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+train_list = [train]
+test_list = [test]

configs/_base_/det_datasets/icdar2017.py ADDED Viewed

	@@ -0,0 +1,18 @@

+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2017'
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_training.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+test = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_val.json',
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+train_list = [train]
+test_list = [test]

configs/_base_/det_datasets/synthtext.py ADDED Viewed

	@@ -0,0 +1,18 @@

+dataset_type = 'TextDetDataset'
+data_root = 'data/synthtext'
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/instances_training.lmdb',
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    img_prefix=f'{data_root}/imgs',
+    pipeline=None)
+train_list = [train]
+test_list = [train]

configs/_base_/det_datasets/toy_data.py ADDED Viewed

	@@ -0,0 +1,41 @@

+root = 'tests/data/toy_dataset'
+# dataset with type='TextDetDataset'
+train1 = dict(
+    type='TextDetDataset',
+    img_prefix=f'{root}/imgs',
+    ann_file=f'{root}/instances_test.txt',
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=4,
+        file_format='txt',
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    pipeline=None,
+    test_mode=False)
+# dataset with type='IcdarDataset'
+train2 = dict(
+    type='IcdarDataset',
+    ann_file=f'{root}/instances_test.json',
+    img_prefix=f'{root}/imgs',
+    pipeline=None)
+test = dict(
+    type='TextDetDataset',
+    img_prefix=f'{root}/imgs',
+    ann_file=f'{root}/instances_test.txt',
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    pipeline=None,
+    test_mode=True)
+train_list = [train1, train2]
+test_list = [test]

configs/_base_/det_models/dbnet_r18_fpnc.py ADDED Viewed

	@@ -0,0 +1,21 @@

+model = dict(
+    type='DBNet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
+        norm_eval=False,
+        style='caffe'),
+    neck=dict(
+        type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
+    bbox_head=dict(
+        type='DBHead',
+        in_channels=256,
+        loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
+        postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/dbnet_r50dcnv2_fpnc.py ADDED Viewed

	@@ -0,0 +1,23 @@

+model = dict(
+    type='DBNet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        style='pytorch',
+        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        stage_with_dcn=(False, True, True, True)),
+    neck=dict(
+        type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
+    bbox_head=dict(
+        type='DBHead',
+        in_channels=256,
+        loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
+        postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/dbnetpp_r50dcnv2_fpnc.py ADDED Viewed

	@@ -0,0 +1,28 @@

+model = dict(
+    type='DBNet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        style='pytorch',
+        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        stage_with_dcn=(False, True, True, True)),
+    neck=dict(
+        type='FPNC',
+        in_channels=[256, 512, 1024, 2048],
+        lateral_channels=256,
+        asf_cfg=dict(attention_type='ScaleChannelSpatial')),
+    bbox_head=dict(
+        type='DBHead',
+        in_channels=256,
+        loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
+        postprocessor=dict(
+            type='DBPostprocessor', text_repr_type='quad',
+            epsilon_ratio=0.002)),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/drrg_r50_fpn_unet.py ADDED Viewed

	@@ -0,0 +1,21 @@

+model = dict(
+    type='DRRG',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
+    bbox_head=dict(
+        type='DRRGHead',
+        in_channels=32,
+        text_region_thr=0.3,
+        center_region_thr=0.4,
+        loss=dict(type='DRRGLoss'),
+        postprocessor=dict(type='DRRGPostprocessor', link_thr=0.80)))

configs/_base_/det_models/fcenet_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,33 @@

+model = dict(
+    type='FCENet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=False,
+        style='pytorch'),
+    neck=dict(
+        type='mmdet.FPN',
+        in_channels=[512, 1024, 2048],
+        out_channels=256,
+        add_extra_convs='on_output',
+        num_outs=3,
+        relu_before_extra_convs=True,
+        act_cfg=None),
+    bbox_head=dict(
+        type='FCEHead',
+        in_channels=256,
+        scales=(8, 16, 32),
+        fourier_degree=5,
+        loss=dict(type='FCELoss', num_sample=50),
+        postprocessor=dict(
+            type='FCEPostprocessor',
+            text_repr_type='quad',
+            num_reconstr_points=50,
+            alpha=1.2,
+            beta=1.0,
+            score_thr=0.3)))

configs/_base_/det_models/fcenet_r50dcnv2_fpn.py ADDED Viewed

	@@ -0,0 +1,35 @@

+model = dict(
+    type='FCENet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        stage_with_dcn=(False, True, True, True)),
+    neck=dict(
+        type='mmdet.FPN',
+        in_channels=[512, 1024, 2048],
+        out_channels=256,
+        add_extra_convs='on_output',
+        num_outs=3,
+        relu_before_extra_convs=True,
+        act_cfg=None),
+    bbox_head=dict(
+        type='FCEHead',
+        in_channels=256,
+        scales=(8, 16, 32),
+        fourier_degree=5,
+        loss=dict(type='FCELoss', num_sample=50),
+        postprocessor=dict(
+            type='FCEPostprocessor',
+            text_repr_type='poly',
+            num_reconstr_points=50,
+            alpha=1.0,
+            beta=2.0,
+            score_thr=0.3)))

configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# model settings
+model = dict(
+    type='OCRMaskRCNN',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='mmdet.FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[4],
+            ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=1,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=1,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1,
+                gpu_assign_thr=50),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='OHEMSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# model settings
+model = dict(
+    type='OCRMaskRCNN',
+    text_repr_type='poly',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        style='pytorch'),
+    neck=dict(
+        type='mmdet.FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[4],
+            ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1,
+                gpu_assign_thr=50),
+            sampler=dict(
+                type='OHEMSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

configs/_base_/det_models/panet_r18_fpem_ffm.py ADDED Viewed

	@@ -0,0 +1,43 @@

+model_poly = dict(
+    type='PANet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
+    bbox_head=dict(
+        type='PANHead',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss'),
+        postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')),
+    train_cfg=None,
+    test_cfg=None)
+model_quad = dict(
+    type='PANet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
+    bbox_head=dict(
+        type='PANHead',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss'),
+        postprocessor=dict(type='PANPostprocessor', text_repr_type='quad')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/panet_r50_fpem_ffm.py ADDED Viewed

	@@ -0,0 +1,21 @@

+model = dict(
+    type='PANet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
+    bbox_head=dict(
+        type='PANHead',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss', speedup_bbox_thr=32),
+        postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/psenet_r50_fpnf.py ADDED Viewed

	@@ -0,0 +1,51 @@

+model_poly = dict(
+    type='PSENet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    bbox_head=dict(
+        type='PSEHead',
+        in_channels=[256],
+        out_channels=7,
+        loss=dict(type='PSELoss'),
+        postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
+    train_cfg=None,
+    test_cfg=None)
+model_quad = dict(
+    type='PSENet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    bbox_head=dict(
+        type='PSEHead',
+        in_channels=[256],
+        out_channels=7,
+        loss=dict(type='PSELoss'),
+        postprocessor=dict(type='PSEPostprocessor', text_repr_type='quad')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_models/textsnake_r50_fpn_unet.py ADDED Viewed

	@@ -0,0 +1,22 @@

+model = dict(
+    type='TextSnake',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
+    bbox_head=dict(
+        type='TextSnakeHead',
+        in_channels=32,
+        loss=dict(type='TextSnakeLoss'),
+        postprocessor=dict(
+            type='TextSnakePostprocessor', text_repr_type='poly')),
+    train_cfg=None,
+    test_cfg=None)

configs/_base_/det_pipelines/dbnet_pipeline.py ADDED Viewed

	@@ -0,0 +1,88 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline_r18 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ImgAug',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    dict(type='EastRandomCrop', target_size=(640, 640)),
+    dict(type='DBNetTargets', shrink_ratio=0.4),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
+        visualize=dict(flag=False, boundary_key='gt_shrink')),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
+]
+test_pipeline_1333_736 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 736),  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for dbnet_r50dcnv2_fpnc
+img_norm_cfg_r50dcnv2 = dict(
+    mean=[122.67891434, 116.66876762, 104.00698793],
+    std=[58.395, 57.12, 57.375],
+    to_rgb=True)
+train_pipeline_r50dcnv2 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg_r50dcnv2),
+    dict(
+        type='ImgAug',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    dict(type='EastRandomCrop', target_size=(640, 640)),
+    dict(type='DBNetTargets', shrink_ratio=0.4),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
+        visualize=dict(flag=False, boundary_key='gt_shrink')),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
+]
+test_pipeline_4068_1024 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(4068, 1024),  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg_r50dcnv2),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/drrg_pipeline.py ADDED Viewed

	@@ -0,0 +1,60 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
+    dict(
+        type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
+    dict(
+        type='RandomCropPolyInstances',
+        instance_key='gt_masks',
+        crop_ratio=0.8,
+        min_side_ratio=0.3),
+    dict(
+        type='RandomRotatePolyInstances',
+        rotate_ratio=0.5,
+        max_angle=60,
+        pad_with_fixed_color=False),
+    dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='DRRGTargets'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=[
+            'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
+            'gt_cos_map', 'gt_comp_attribs'
+        ],
+        visualize=dict(flag=False, boundary_key='gt_text_mask')),
+    dict(
+        type='Collect',
+        keys=[
+            'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
+            'gt_cos_map', 'gt_comp_attribs'
+        ])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1024, 640),  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/fcenet_pipeline.py ADDED Viewed

	@@ -0,0 +1,118 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# for icdar2015
+leval_prop_range_icdar2015 = ((0, 0.4), (0.3, 0.7), (0.6, 1.0))
+train_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(
+        type='ColorJitter',
+        brightness=32.0 / 255,
+        saturation=0.5,
+        contrast=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
+    dict(
+        type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
+    dict(
+        type='RandomCropPolyInstances',
+        instance_key='gt_masks',
+        crop_ratio=0.8,
+        min_side_ratio=0.3),
+    dict(
+        type='RandomRotatePolyInstances',
+        rotate_ratio=0.5,
+        max_angle=30,
+        pad_with_fixed_color=False),
+    dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='FCENetTargets',
+        fourier_degree=5,
+        level_proportion_range=leval_prop_range_icdar2015),
+    dict(
+        type='CustomFormatBundle',
+        keys=['p3_maps', 'p4_maps', 'p5_maps'],
+        visualize=dict(flag=False, boundary_key=None)),
+    dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
+]
+img_scale_icdar2015 = (2260, 2260)
+test_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_icdar2015,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for ctw1500
+leval_prop_range_ctw1500 = ((0, 0.25), (0.2, 0.65), (0.55, 1.0))
+train_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(
+        type='ColorJitter',
+        brightness=32.0 / 255,
+        saturation=0.5,
+        contrast=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
+    dict(
+        type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
+    dict(
+        type='RandomCropPolyInstances',
+        instance_key='gt_masks',
+        crop_ratio=0.8,
+        min_side_ratio=0.3),
+    dict(
+        type='RandomRotatePolyInstances',
+        rotate_ratio=0.5,
+        max_angle=30,
+        pad_with_fixed_color=False),
+    dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='FCENetTargets',
+        fourier_degree=5,
+        level_proportion_range=leval_prop_range_ctw1500),
+    dict(
+        type='CustomFormatBundle',
+        keys=['p3_maps', 'p4_maps', 'p5_maps'],
+        visualize=dict(flag=False, boundary_key=None)),
+    dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
+]
+img_scale_ctw1500 = (1080, 736)
+test_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_ctw1500,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/maskrcnn_pipeline.py ADDED Viewed

	@@ -0,0 +1,57 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=None,
+        keep_ratio=False,
+        resize_type='indep_sample_in_range',
+        scale_range=(640, 2560)),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        mask_type='union_all',
+        instance_key='gt_masks'),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+# for ctw1500
+img_scale_ctw1500 = (1600, 1600)
+test_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_ctw1500,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for icdar2015
+img_scale_icdar2015 = (1920, 1920)
+test_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_icdar2015,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/panet_pipeline.py ADDED Viewed

	@@ -0,0 +1,156 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# for ctw1500
+img_scale_train_ctw1500 = [(3000, 640)]
+shrink_ratio_train_ctw1500 = (1.0, 0.7)
+target_size_train_ctw1500 = (640, 640)
+train_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=img_scale_train_ctw1500,
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    # shrink_ratio is from big to small. The 1st must be 1.0
+    dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_ctw1500),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=target_size_train_ctw1500,
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+img_scale_test_ctw1500 = (3000, 640)
+test_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_test_ctw1500,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for icdar2015
+img_scale_train_icdar2015 = [(3000, 736)]
+shrink_ratio_train_icdar2015 = (1.0, 0.5)
+target_size_train_icdar2015 = (736, 736)
+train_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=img_scale_train_icdar2015,
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2015),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=target_size_train_icdar2015,
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+img_scale_test_icdar2015 = (1333, 736)
+test_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_test_icdar2015,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for icdar2017
+img_scale_train_icdar2017 = [(3000, 800)]
+shrink_ratio_train_icdar2017 = (1.0, 0.5)
+target_size_train_icdar2017 = (800, 800)
+train_pipeline_icdar2017 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=img_scale_train_icdar2017,
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2017),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=target_size_train_icdar2017,
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+img_scale_test_icdar2017 = (1333, 800)
+test_pipeline_icdar2017 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_test_icdar2017,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/psenet_pipeline.py ADDED Viewed

	@@ -0,0 +1,70 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],
+        ratio_range=(0.5, 3),
+        aspect_ratio_range=(1, 1),
+        multiscale_mode='value',
+        long_size_bound=1280,
+        short_size_bound=640,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='PSENetTargets'),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+# for ctw1500
+img_scale_test_ctw1500 = (1280, 1280)
+test_pipeline_ctw1500 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_test_ctw1500,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+# for icdar2015
+img_scale_test_icdar2015 = (2240, 2240)
+test_pipeline_icdar2015 = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale_test_icdar2015,  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/det_pipelines/textsnake_pipeline.py ADDED Viewed

	@@ -0,0 +1,65 @@

+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropPolyInstances',
+        instance_key='gt_masks',
+        crop_ratio=0.65,
+        min_side_ratio=0.3),
+    dict(
+        type='RandomRotatePolyInstances',
+        rotate_ratio=0.5,
+        max_angle=20,
+        pad_with_fixed_color=False),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],  # unused
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        long_size_bound=800,
+        short_size_bound=480,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='TextSnakeTargets'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=[
+            'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
+        ],
+        visualize=dict(flag=False, boundary_key='gt_text_mask')),
+    dict(
+        type='Collect',
+        keys=[
+            'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
+        ])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 736),  # used by Resize
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]

configs/_base_/recog_datasets/MJ_train.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: Syn90k
+train_root = 'data/mixture/Syn90k'
+train_img_prefix = f'{train_root}/mnt/ramdisk/max/90kDICT32px'
+train_ann_file = f'{train_root}/label.lmdb'
+train = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix,
+    ann_file=train_ann_file,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=False)
+train_list = [train]

configs/_base_/recog_datasets/ST_MJ_alphanumeric_train.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: SynthText, Syn90k
+# Both annotations are filtered so that
+# only alphanumeric terms are left
+train_root = 'data/mixture'
+train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
+train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
+train1 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=False)
+train_img_prefix2 = f'{train_root}/SynthText/' + \
+    'synthtext/SynthText_patch_horizontal'
+train_ann_file2 = f'{train_root}/SynthText/alphanumeric_label.lmdb'
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+train_list = [train1, train2]

configs/_base_/recog_datasets/ST_MJ_train.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: SynthText, Syn90k
+train_root = 'data/mixture'
+train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
+train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
+train1 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=False)
+train_img_prefix2 = f'{train_root}/SynthText/' + \
+    'synthtext/SynthText_patch_horizontal'
+train_ann_file2 = f'{train_root}/SynthText/label.lmdb'
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+train_list = [train1, train2]

configs/_base_/recog_datasets/ST_SA_MJ_real_train.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: SynthText, SynthAdd, Syn90k
+# Real Dataset: IC11, IC13, IC15, COCO-Test, IIIT5k
+train_prefix = 'data/mixture'
+train_img_prefix1 = f'{train_prefix}/icdar_2011'
+train_img_prefix2 = f'{train_prefix}/icdar_2013'
+train_img_prefix3 = f'{train_prefix}/icdar_2015'
+train_img_prefix4 = f'{train_prefix}/coco_text'
+train_img_prefix5 = f'{train_prefix}/IIIT5K'
+train_img_prefix6 = f'{train_prefix}/SynthText_Add'
+train_img_prefix7 = f'{train_prefix}/SynthText'
+train_img_prefix8 = f'{train_prefix}/Syn90k'
+train_ann_file1 = f'{train_prefix}/icdar_2011/train_label.txt',
+train_ann_file2 = f'{train_prefix}/icdar_2013/train_label.txt',
+train_ann_file3 = f'{train_prefix}/icdar_2015/train_label.txt',
+train_ann_file4 = f'{train_prefix}/coco_text/train_label.txt',
+train_ann_file5 = f'{train_prefix}/IIIT5K/train_label.txt',
+train_ann_file6 = f'{train_prefix}/SynthText_Add/label.txt',
+train_ann_file7 = f'{train_prefix}/SynthText/shuffle_labels.txt',
+train_ann_file8 = f'{train_prefix}/Syn90k/shuffle_labels.txt'
+train1 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=20,
+        file_format='txt',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=False)
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+train3 = {key: value for key, value in train1.items()}
+train3['img_prefix'] = train_img_prefix3
+train3['ann_file'] = train_ann_file3
+train4 = {key: value for key, value in train1.items()}
+train4['img_prefix'] = train_img_prefix4
+train4['ann_file'] = train_ann_file4
+train5 = {key: value for key, value in train1.items()}
+train5['img_prefix'] = train_img_prefix5
+train5['ann_file'] = train_ann_file5
+train6 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix6,
+    ann_file=train_ann_file6,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=False)
+train7 = {key: value for key, value in train6.items()}
+train7['img_prefix'] = train_img_prefix7
+train7['ann_file'] = train_ann_file7
+train8 = {key: value for key, value in train6.items()}
+train8['img_prefix'] = train_img_prefix8
+train8['ann_file'] = train_ann_file8
+train_list = [train1, train2, train3, train4, train5, train6, train7, train8]

configs/_base_/recog_datasets/ST_SA_MJ_train.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: SynthText, Syn90k
+train_root = 'data/mixture'
+train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
+train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
+train1 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=False)
+train_img_prefix2 = f'{train_root}/SynthText/' + \
+    'synthtext/SynthText_patch_horizontal'
+train_ann_file2 = f'{train_root}/SynthText/label.lmdb'
+train_img_prefix3 = f'{train_root}/SynthText_Add'
+train_ann_file3 = f'{train_root}/SynthText_Add/label.txt'
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+train3 = dict(
+    type='OCRDataset',
+    img_prefix=train_img_prefix3,
+    ann_file=train_ann_file3,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=False)
+train_list = [train1, train2, train3]

configs/_base_/recog_datasets/ST_charbox_train.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# Text Recognition Training set, including:
+# Synthetic Datasets: SynthText (with character level boxes)
+train_img_root = 'data/mixture'
+train_img_prefix = f'{train_img_root}/SynthText'
+train_ann_file = f'{train_img_root}/SynthText/instances_train.txt'
+train = dict(
+    type='OCRSegDataset',
+    img_prefix=train_img_prefix,
+    ann_file=train_ann_file,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
+    pipeline=None,
+    test_mode=False)
+train_list = [train]

configs/_base_/recog_datasets/academic_test.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Text Recognition Testing set, including:
+# Regular Datasets: IIIT5K, SVT, IC13
+# Irregular Datasets: IC15, SVTP, CT80
+test_root = 'data/mixture'
+test_img_prefix1 = f'{test_root}/IIIT5K/'
+test_img_prefix2 = f'{test_root}/svt/'
+test_img_prefix3 = f'{test_root}/icdar_2013/'
+test_img_prefix4 = f'{test_root}/icdar_2015/'
+test_img_prefix5 = f'{test_root}/svtp/'
+test_img_prefix6 = f'{test_root}/ct80/'
+test_ann_file1 = f'{test_root}/IIIT5K/test_label.txt'
+test_ann_file2 = f'{test_root}/svt/test_label.txt'
+test_ann_file3 = f'{test_root}/icdar_2013/test_label_1015.txt'
+test_ann_file4 = f'{test_root}/icdar_2015/test_label.txt'
+test_ann_file5 = f'{test_root}/svtp/test_label.txt'
+test_ann_file6 = f'{test_root}/ct80/test_label.txt'
+test1 = dict(
+    type='OCRDataset',
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=True)
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+test5 = {key: value for key, value in test1.items()}
+test5['img_prefix'] = test_img_prefix5
+test5['ann_file'] = test_ann_file5
+test6 = {key: value for key, value in test1.items()}
+test6['img_prefix'] = test_img_prefix6
+test6['ann_file'] = test_ann_file6
+test_list = [test1, test2, test3, test4, test5, test6]

configs/_base_/recog_datasets/seg_toy_data.py ADDED Viewed

	@@ -0,0 +1,34 @@

+prefix = 'tests/data/ocr_char_ann_toy_dataset/'
+train = dict(
+    type='OCRSegDataset',
+    img_prefix=f'{prefix}/imgs',
+    ann_file=f'{prefix}/instances_train.txt',
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=100,
+        file_format='txt',
+        parser=dict(
+            type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
+    pipeline=None,
+    test_mode=True)
+test = dict(
+    type='OCRDataset',
+    img_prefix=f'{prefix}/imgs',
+    ann_file=f'{prefix}/instances_test.txt',
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='txt',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=True)
+train_list = [train]
+test_list = [test]

configs/_base_/recog_datasets/toy_data.py ADDED Viewed

	@@ -0,0 +1,54 @@

+dataset_type = 'OCRDataset'
+root = 'tests/data/ocr_toy_dataset'
+img_prefix = f'{root}/imgs'
+train_anno_file1 = f'{root}/label.txt'
+train1 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=100,
+        file_format='txt',
+        file_storage_backend='disk',
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=None,
+    test_mode=False)
+train_anno_file2 = f'{root}/label.lmdb'
+train2 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file2,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=100,
+        file_format='lmdb',
+        file_storage_backend='disk',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=False)
+test_anno_file1 = f'{root}/label.lmdb'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file1,
+    loader=dict(
+        type='AnnFileLoader',
+        repeat=1,
+        file_format='lmdb',
+        file_storage_backend='disk',
+        parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
+    pipeline=None,
+    test_mode=True)
+train_list = [train1, train2]
+test_list = [test]

configs/_base_/recog_models/abinet.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# num_chars depends on the configuration of label_convertor. The actual
+# dictionary size is 36 + 1 (<BOS/EOS>).
+# TODO: Automatically update num_chars based on the configuration of
+# label_convertor
+num_chars = 37
+max_seq_len = 26
+label_convertor = dict(
+    type='ABIConvertor',
+    dict_type='DICT36',
+    with_unknown=False,
+    with_padding=False,
+    lower=True,
+)
+model = dict(
+    type='ABINet',
+    backbone=dict(type='ResNetABI'),
+    encoder=dict(
+        type='ABIVisionModel',
+        encoder=dict(
+            type='TransformerEncoder',
+            n_layers=3,
+            n_head=8,
+            d_model=512,
+            d_inner=2048,
+            dropout=0.1,
+            max_len=8 * 32,
+        ),
+        decoder=dict(
+            type='ABIVisionDecoder',
+            in_channels=512,
+            num_channels=64,
+            attn_height=8,
+            attn_width=32,
+            attn_mode='nearest',
+            use_result='feature',
+            num_chars=num_chars,
+            max_seq_len=max_seq_len,
+            init_cfg=dict(type='Xavier', layer='Conv2d')),
+    ),
+    decoder=dict(
+        type='ABILanguageDecoder',
+        d_model=512,
+        n_head=8,
+        d_inner=2048,
+        n_layers=4,
+        dropout=0.1,
+        detach_tokens=True,
+        use_self_attn=False,
+        pad_idx=num_chars - 1,
+        num_chars=num_chars,
+        max_seq_len=max_seq_len,
+        init_cfg=None),
+    fuser=dict(
+        type='ABIFuser',
+        d_model=512,
+        num_chars=num_chars,
+        init_cfg=None,
+        max_seq_len=max_seq_len,
+    ),
+    loss=dict(
+        type='ABILoss',
+        enc_weight=1.0,
+        dec_weight=1.0,
+        fusion_weight=1.0,
+        num_classes=num_chars),
+    label_convertor=label_convertor,
+    max_seq_len=max_seq_len,
+    iter_size=3)

configs/_base_/recog_models/crnn.py ADDED Viewed

	@@ -0,0 +1,12 @@

+label_convertor = dict(
+    type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True)
+model = dict(
+    type='CRNNNet',
+    preprocessor=None,
+    backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1),
+    encoder=None,
+    decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
+    loss=dict(type='CTCLoss'),
+    label_convertor=label_convertor,
+    pretrained=None)

configs/_base_/recog_models/crnn_tps.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# model
+label_convertor = dict(
+    type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True)
+model = dict(
+    type='CRNNNet',
+    preprocessor=dict(
+        type='TPSPreprocessor',
+        num_fiducial=20,
+        img_size=(32, 100),
+        rectified_img_size=(32, 100),
+        num_img_channel=1),
+    backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1),
+    encoder=None,
+    decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
+    loss=dict(type='CTCLoss'),
+    label_convertor=label_convertor,
+    pretrained=None)

configs/_base_/recog_models/master.py ADDED Viewed

	@@ -0,0 +1,61 @@

+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+model = dict(
+    type='MASTER',
+    backbone=dict(
+        type='ResNet',
+        in_channels=3,
+        stem_channels=[64, 128],
+        block_cfgs=dict(
+            type='BasicBlock',
+            plugins=dict(
+                cfg=dict(
+                    type='GCAModule',
+                    ratio=0.0625,
+                    n_head=1,
+                    pooling_type='att',
+                    is_att_scale=False,
+                    fusion_type='channel_add'),
+                position='after_conv2')),
+        arch_layers=[1, 2, 5, 3],
+        arch_channels=[256, 256, 512, 512],
+        strides=[1, 1, 1, 1],
+        plugins=[
+            dict(
+                cfg=dict(type='Maxpool2d', kernel_size=2, stride=(2, 2)),
+                stages=(True, True, False, False),
+                position='before_stage'),
+            dict(
+                cfg=dict(type='Maxpool2d', kernel_size=(2, 1), stride=(2, 1)),
+                stages=(False, False, True, False),
+                position='before_stage'),
+            dict(
+                cfg=dict(
+                    type='ConvModule',
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    norm_cfg=dict(type='BN'),
+                    act_cfg=dict(type='ReLU')),
+                stages=(True, True, True, True),
+                position='after_stage')
+        ],
+        init_cfg=[
+            dict(type='Kaiming', layer='Conv2d'),
+            dict(type='Constant', val=1, layer='BatchNorm2d'),
+        ]),
+    encoder=None,
+    decoder=dict(
+        type='MasterDecoder',
+        d_model=512,
+        n_head=8,
+        attn_drop=0.,
+        ffn_drop=0.,
+        d_inner=2048,
+        n_layers=3,
+        feat_pe_drop=0.2,
+        feat_size=6 * 40),
+    loss=dict(type='TFLoss', reduction='mean'),
+    label_convertor=label_convertor,
+    max_seq_len=30)

configs/_base_/recog_models/nrtr_modality_transform.py ADDED Viewed

	@@ -0,0 +1,11 @@

+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+model = dict(
+    type='NRTR',
+    backbone=dict(type='NRTRModalityTransform'),
+    encoder=dict(type='NRTREncoder', n_layers=12),
+    decoder=dict(type='NRTRDecoder'),
+    loss=dict(type='TFLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=40)

configs/_base_/recog_models/robust_scanner.py ADDED Viewed

	@@ -0,0 +1,24 @@

+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+hybrid_decoder = dict(type='SequenceAttentionDecoder')
+position_decoder = dict(type='PositionAttentionDecoder')
+model = dict(
+    type='RobustScanner',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='ChannelReductionEncoder',
+        in_channels=512,
+        out_channels=128,
+    ),
+    decoder=dict(
+        type='RobustScannerDecoder',
+        dim_input=512,
+        dim_model=128,
+        hybrid_decoder=hybrid_decoder,
+        position_decoder=position_decoder),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)

configs/_base_/recog_models/sar.py ADDED Viewed

	@@ -0,0 +1,24 @@

+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+model = dict(
+    type='SARNet',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='SAREncoder',
+        enc_bi_rnn=False,
+        enc_do_rnn=0.1,
+        enc_gru=False,
+    ),
+    decoder=dict(
+        type='ParallelSARDecoder',
+        enc_bi_rnn=False,
+        dec_bi_rnn=False,
+        dec_do_rnn=0,
+        dec_gru=False,
+        pred_dropout=0.1,
+        d_k=512,
+        pred_concat=True),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)

configs/_base_/recog_models/satrn.py ADDED Viewed

	@@ -0,0 +1,11 @@

+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+model = dict(
+    type='SATRN',
+    backbone=dict(type='ShallowCNN'),
+    encoder=dict(type='SatrnEncoder'),
+    decoder=dict(type='TFDecoder'),
+    loss=dict(type='TFLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=40)

configs/_base_/recog_models/seg.py ADDED Viewed

	@@ -0,0 +1,21 @@

+label_convertor = dict(
+    type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+model = dict(
+    type='SegRecognizer',
+    backbone=dict(
+        type='ResNet31OCR',
+        layers=[1, 2, 5, 3],
+        channels=[32, 64, 128, 256, 512, 512],
+        out_indices=[0, 1, 2, 3],
+        stage4_pool_cfg=dict(kernel_size=2, stride=2),
+        last_stage_pool=True),
+    neck=dict(
+        type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256),
+    head=dict(
+        type='SegHead',
+        in_channels=256,
+        upsample_param=dict(scale_factor=2.0, mode='nearest')),
+    loss=dict(
+        type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=True),
+    label_convertor=label_convertor)

configs/_base_/recog_pipelines/abinet_pipeline.py ADDED Viewed

	@@ -0,0 +1,96 @@

+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=128,
+        max_width=128,
+        keep_aspect_ratio=False,
+        width_downsample_ratio=0.25),
+    dict(
+        type='RandomWrapper',
+        p=0.5,
+        transforms=[
+            dict(
+                type='OneOfWrapper',
+                transforms=[
+                    dict(
+                        type='RandomRotateTextDet',
+                        max_angle=15,
+                    ),
+                    dict(
+                        type='TorchVisionWrapper',
+                        op='RandomAffine',
+                        degrees=15,
+                        translate=(0.3, 0.3),
+                        scale=(0.5, 2.),
+                        shear=(-45, 45),
+                    ),
+                    dict(
+                        type='TorchVisionWrapper',
+                        op='RandomPerspective',
+                        distortion_scale=0.5,
+                        p=1,
+                    ),
+                ])
+        ],
+    ),
+    dict(
+        type='RandomWrapper',
+        p=0.25,
+        transforms=[
+            dict(type='PyramidRescale'),
+            dict(
+                type='Albu',
+                transforms=[
+                    dict(type='GaussNoise', var_limit=(20, 20), p=0.5),
+                    dict(type='MotionBlur', blur_limit=6, p=0.5),
+                ]),
+        ]),
+    dict(
+        type='RandomWrapper',
+        p=0.25,
+        transforms=[
+            dict(
+                type='TorchVisionWrapper',
+                op='ColorJitter',
+                brightness=0.5,
+                saturation=0.5,
+                contrast=0.5,
+                hue=0.1),
+        ]),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio',
+            'resize_shape'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=32,
+                min_width=128,
+                max_width=128,
+                keep_aspect_ratio=False,
+                width_downsample_ratio=0.25),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio',
+                    'resize_shape', 'img_norm_cfg', 'ori_filename'
+                ]),
+        ])
+]

configs/_base_/recog_pipelines/crnn_pipeline.py ADDED Viewed

	@@ -0,0 +1,35 @@

+img_norm_cfg = dict(mean=[127], std=[127])
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='grayscale'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=100,
+        max_width=100,
+        keep_aspect_ratio=False),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=['filename', 'resize_shape', 'text', 'valid_ratio']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', color_type='grayscale'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=None,
+        keep_aspect_ratio=True),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'resize_shape', 'valid_ratio', 'img_norm_cfg',
+            'ori_filename', 'img_shape', 'ori_shape'
+        ]),
+]

configs/_base_/recog_pipelines/crnn_tps_pipeline.py ADDED Viewed

	@@ -0,0 +1,37 @@

+img_norm_cfg = dict(mean=[0.5], std=[0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='grayscale'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=100,
+        max_width=100,
+        keep_aspect_ratio=False),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', color_type='grayscale'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=100,
+        keep_aspect_ratio=False),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'resize_shape', 'valid_ratio',
+            'img_norm_cfg', 'ori_filename', 'img_shape'
+        ]),
+]

configs/_base_/recog_pipelines/master_pipeline.py ADDED Viewed

	@@ -0,0 +1,42 @@

+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=48,
+        min_width=48,
+        max_width=160,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio',
+            'resize_shape'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=48,
+                min_width=48,
+                max_width=160,
+                keep_aspect_ratio=True),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio',
+                    'img_norm_cfg', 'ori_filename', 'resize_shape'
+                ]),
+        ])
+]

configs/_base_/recog_pipelines/nrtr_pipeline.py ADDED Viewed

	@@ -0,0 +1,38 @@

+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=160,
+        keep_aspect_ratio=True,
+        width_downsample_ratio=0.25),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=160,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'resize_shape', 'valid_ratio',
+            'img_norm_cfg', 'ori_filename', 'img_shape'
+        ])
+]