Spaces:
Sleeping
Sleeping
default_scope = 'mmdet' | |
default_hooks = dict( | |
timer=dict(type='IterTimerHook'), | |
logger=dict(type='LoggerHook', interval=100), | |
param_scheduler=dict(type='ParamSchedulerHook'), | |
checkpoint=dict( | |
type='CheckpointHook', interval=1, max_keep_ckpts=5, save_best='auto'), | |
sampler_seed=dict(type='DistSamplerSeedHook'), | |
visualization=dict(type='DetVisualizationHook')) | |
env_cfg = dict( | |
cudnn_benchmark=False, | |
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | |
dist_cfg=dict(backend='nccl')) | |
vis_backends = [dict(type='LocalVisBackend')] | |
visualizer = dict( | |
type='DetLocalVisualizer', | |
vis_backends=[dict(type='LocalVisBackend')], | |
name='visualizer', | |
save_dir='./') | |
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) | |
log_level = 'INFO' | |
load_from = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_lines_pr_2/epoch_11.pth' | |
resume = True | |
train_cfg = dict( | |
type='EpochBasedTrainLoop', | |
max_epochs=12, | |
val_interval=12, | |
dynamic_intervals=[(10, 1)]) | |
val_cfg = dict(type='ValLoop') | |
test_cfg = dict( | |
type='TestLoop', | |
pipeline=[ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
]) | |
param_scheduler = [ | |
dict( | |
type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, | |
end=1000), | |
dict( | |
type='CosineAnnealingLR', | |
eta_min=1.25e-05, | |
begin=6, | |
end=12, | |
T_max=6, | |
by_epoch=True, | |
convert_to_iter_based=True) | |
] | |
optim_wrapper = dict( | |
type='OptimWrapper', | |
optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05), | |
paramwise_cfg=dict( | |
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) | |
auto_scale_lr = dict(enable=False, base_batch_size=16) | |
dataset_type = 'CocoDataset' | |
data_root = 'data/coco/' | |
file_client_args = dict(backend='disk') | |
train_pipeline = [ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), | |
dict( | |
type='RandomResize', | |
scale=(1280, 1280), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='CachedMixUp', | |
img_scale=(640, 640), | |
ratio_range=(1.0, 1.0), | |
max_cached_images=20, | |
pad_val=(114, 114, 114)), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='PackDetInputs') | |
] | |
test_pipeline = [ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
] | |
tta_model = dict( | |
type='DetTTAModel', | |
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100)) | |
img_scales = [(640, 640), (320, 320), (960, 960)] | |
tta_pipeline = [ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict( | |
type='TestTimeAug', | |
transforms=[[{ | |
'type': 'Resize', | |
'scale': (640, 640), | |
'keep_ratio': True | |
}, { | |
'type': 'Resize', | |
'scale': (320, 320), | |
'keep_ratio': True | |
}, { | |
'type': 'Resize', | |
'scale': (960, 960), | |
'keep_ratio': True | |
}], | |
[{ | |
'type': 'RandomFlip', | |
'prob': 1.0 | |
}, { | |
'type': 'RandomFlip', | |
'prob': 0.0 | |
}], | |
[{ | |
'type': 'Pad', | |
'size': (960, 960), | |
'pad_val': { | |
'img': (114, 114, 114) | |
} | |
}], | |
[{ | |
'type': | |
'PackDetInputs', | |
'meta_keys': | |
('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor', 'flip', 'flip_direction') | |
}]]) | |
] | |
model = dict( | |
type='RTMDet', | |
data_preprocessor=dict( | |
type='DetDataPreprocessor', | |
mean=[103.53, 116.28, 123.675], | |
std=[57.375, 57.12, 58.395], | |
bgr_to_rgb=False, | |
batch_augments=None), | |
backbone=dict( | |
type='CSPNeXt', | |
arch='P5', | |
expand_ratio=0.5, | |
deepen_factor=0.67, | |
widen_factor=0.75, | |
channel_attention=True, | |
norm_cfg=dict(type='SyncBN'), | |
act_cfg=dict(type='SiLU', inplace=True)), | |
neck=dict( | |
type='CSPNeXtPAFPN', | |
in_channels=[192, 384, 768], | |
out_channels=192, | |
num_csp_blocks=2, | |
expand_ratio=0.5, | |
norm_cfg=dict(type='SyncBN'), | |
act_cfg=dict(type='SiLU', inplace=True)), | |
bbox_head=dict( | |
type='RTMDetInsSepBNHead', | |
num_classes=80, | |
in_channels=192, | |
stacked_convs=2, | |
share_conv=True, | |
pred_kernel_size=1, | |
feat_channels=192, | |
act_cfg=dict(type='SiLU', inplace=True), | |
norm_cfg=dict(type='SyncBN', requires_grad=True), | |
anchor_generator=dict( | |
type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]), | |
bbox_coder=dict(type='DistancePointBBoxCoder'), | |
loss_cls=dict( | |
type='QualityFocalLoss', | |
use_sigmoid=True, | |
beta=2.0, | |
loss_weight=1.0), | |
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |
loss_mask=dict( | |
type='DiceLoss', loss_weight=2.0, eps=5e-06, reduction='mean')), | |
train_cfg=dict( | |
assigner=dict(type='DynamicSoftLabelAssigner', topk=13), | |
allowed_border=-1, | |
pos_weight=-1, | |
debug=False), | |
test_cfg=dict( | |
nms_pre=400, | |
min_bbox_size=0, | |
score_thr=0.4, | |
nms=dict(type='nms', iou_threshold=0.6), | |
max_per_img=50, | |
mask_thr_binary=0.5)) | |
train_pipeline_stage2 = [ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict( | |
type='RandomResize', | |
scale=(640, 640), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict(type='PackDetInputs') | |
] | |
train_dataloader = dict( | |
batch_size=2, | |
num_workers=1, | |
batch_sampler=None, | |
pin_memory=True, | |
persistent_workers=True, | |
sampler=dict(type='DefaultSampler', shuffle=True), | |
dataset=dict( | |
type='ConcatDataset', | |
datasets=[ | |
dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', | |
pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict( | |
type='CachedMosaic', | |
img_scale=(640, 640), | |
pad_val=114.0), | |
dict( | |
type='RandomResize', | |
scale=(1280, 1280), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict( | |
type='Pad', | |
size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='CachedMixUp', | |
img_scale=(640, 640), | |
ratio_range=(1.0, 1.0), | |
max_cached_images=20, | |
pad_val=(114, 114, 114)), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='PackDetInputs') | |
]) | |
])) | |
val_dataloader = dict( | |
batch_size=1, | |
num_workers=10, | |
dataset=dict( | |
pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict( | |
type='Pad', size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
], | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json', | |
test_mode=True), | |
persistent_workers=True, | |
drop_last=False, | |
sampler=dict(type='DefaultSampler', shuffle=False)) | |
test_dataloader = dict( | |
batch_size=1, | |
num_workers=10, | |
dataset=dict( | |
pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict( | |
type='Pad', size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
], | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json', | |
test_mode=True), | |
persistent_workers=True, | |
drop_last=False, | |
sampler=dict(type='DefaultSampler', shuffle=False)) | |
max_epochs = 12 | |
stage2_num_epochs = 2 | |
base_lr = 0.00025 | |
interval = 12 | |
val_evaluator = dict( | |
proposal_nums=(100, 1, 10), | |
metric=['bbox', 'segm'], | |
type='CocoMetric', | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json' | |
) | |
test_evaluator = dict( | |
proposal_nums=(100, 1, 10), | |
metric=['bbox', 'segm'], | |
type='CocoMetric', | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json' | |
) | |
custom_hooks = [ | |
dict( | |
type='EMAHook', | |
ema_type='ExpMomentumEMA', | |
momentum=0.0002, | |
update_buffers=True, | |
priority=49), | |
dict( | |
type='PipelineSwitchHook', | |
switch_epoch=10, | |
switch_pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict( | |
type='RandomResize', | |
scale=(640, 640), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict( | |
type='Pad', size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict(type='PackDetInputs') | |
]) | |
] | |
work_dir = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_lines_pr_2' | |
train_batch_size_per_gpu = 2 | |
val_batch_size_per_gpu = 1 | |
train_num_workers = 1 | |
num_classes = 1 | |
metainfo = dict(classes='text_line', palette=[(220, 20, 60)]) | |
icdar_2019 = dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json', | |
pipeline=[ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), | |
dict( | |
type='RandomResize', | |
scale=(1280, 1280), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='CachedMixUp', | |
img_scale=(640, 640), | |
ratio_range=(1.0, 1.0), | |
max_cached_images=20, | |
pad_val=(114, 114, 114)), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='PackDetInputs') | |
]) | |
icdar_2019_test = dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json', | |
test_mode=True, | |
pipeline=[ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
]) | |
police_records = dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', | |
pipeline=[ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), | |
dict( | |
type='RandomResize', | |
scale=(1280, 1280), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='CachedMixUp', | |
img_scale=(640, 640), | |
ratio_range=(1.0, 1.0), | |
max_cached_images=20, | |
pad_val=(114, 114, 114)), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='PackDetInputs') | |
]) | |
train_list = [ | |
dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', | |
pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict( | |
type='LoadAnnotations', | |
with_bbox=True, | |
with_mask=True, | |
poly2mask=False), | |
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), | |
dict( | |
type='RandomResize', | |
scale=(1280, 1280), | |
ratio_range=(0.1, 2.0), | |
keep_ratio=True), | |
dict( | |
type='RandomCrop', | |
crop_size=(640, 640), | |
recompute_bbox=True, | |
allow_negative_crop=True), | |
dict(type='YOLOXHSVRandomAug'), | |
dict(type='RandomFlip', prob=0.5), | |
dict( | |
type='Pad', size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='CachedMixUp', | |
img_scale=(640, 640), | |
ratio_range=(1.0, 1.0), | |
max_cached_images=20, | |
pad_val=(114, 114, 114)), | |
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), | |
dict(type='PackDetInputs') | |
]) | |
] | |
test_list = [ | |
dict( | |
type='CocoDataset', | |
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), | |
data_prefix=dict( | |
img= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' | |
), | |
ann_file= | |
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json', | |
test_mode=True, | |
pipeline=[ | |
dict( | |
type='LoadImageFromFile', | |
file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict( | |
type='Pad', size=(640, 640), | |
pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
]) | |
] | |
pipeline = [ | |
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), | |
dict(type='Resize', scale=(640, 640), keep_ratio=True), | |
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), | |
dict( | |
type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', | |
'scale_factor')) | |
] | |
launcher = 'pytorch' | |