|
|
|
|
|
|
|
_base_ = ['./default_runtime.py'] |
|
|
|
|
|
|
|
data_root = 'data/coco/' |
|
|
|
train_ann_file = 'annotations/instances_train2017.json' |
|
train_data_prefix = 'train2017/' |
|
|
|
val_ann_file = 'annotations/instances_val2017.json' |
|
val_data_prefix = 'val2017/' |
|
|
|
num_classes = 1 |
|
|
|
train_batch_size_per_gpu = 16 |
|
|
|
train_num_workers = 8 |
|
|
|
persistent_workers = True |
|
|
|
|
|
|
|
base_lr = 0.01 |
|
max_epochs = 500 |
|
|
|
close_mosaic_epochs = 10 |
|
|
|
model_test_cfg = dict( |
|
|
|
multi_label=True, |
|
|
|
nms_pre=30000, |
|
score_thr=0.001, |
|
nms=dict(type='nms', iou_threshold=0.7), |
|
max_per_img=300) |
|
|
|
|
|
|
|
img_scale = (640, 640) |
|
|
|
dataset_type = 'YOLOv5CocoDataset' |
|
|
|
val_batch_size_per_gpu = 1 |
|
|
|
val_num_workers = 2 |
|
|
|
|
|
|
|
batch_shapes_cfg = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deepen_factor = 0.33 |
|
|
|
widen_factor = 0.5 |
|
|
|
strides = [8, 16, 32] |
|
|
|
last_stage_out_channels = 1024 |
|
num_det_layers = 3 |
|
norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) |
|
|
|
|
|
affine_scale = 0.5 |
|
|
|
max_aspect_ratio = 100 |
|
tal_topk = 10 |
|
tal_alpha = 0.5 |
|
tal_beta = 6.0 |
|
|
|
loss_cls_weight = 0.5 |
|
loss_bbox_weight = 7.5 |
|
|
|
|
|
loss_dfl_weight = 1.5 / 4 |
|
lr_factor = 0.01 |
|
weight_decay = 0.0005 |
|
|
|
save_epoch_intervals = 10 |
|
|
|
val_interval_stage2 = 1 |
|
|
|
max_keep_ckpts = 2 |
|
|
|
|
|
env_cfg = dict(cudnn_benchmark=True) |
|
|
|
|
|
model = dict( |
|
type='YOLODetector', |
|
data_preprocessor=dict( |
|
type='YOLOv5DetDataPreprocessor', |
|
mean=[0., 0., 0.], |
|
std=[255., 255., 255.], |
|
bgr_to_rgb=True), |
|
backbone=dict( |
|
type='YOLOv8CSPDarknet', |
|
arch='P5', |
|
last_stage_out_channels=last_stage_out_channels, |
|
deepen_factor=deepen_factor, |
|
widen_factor=widen_factor, |
|
norm_cfg=norm_cfg, |
|
act_cfg=dict(type='SiLU', inplace=True)), |
|
neck=dict( |
|
type='YOLOv8PAFPN', |
|
deepen_factor=deepen_factor, |
|
widen_factor=widen_factor, |
|
in_channels=[256, 512, last_stage_out_channels], |
|
out_channels=[256, 512, last_stage_out_channels], |
|
num_csp_blocks=3, |
|
norm_cfg=norm_cfg, |
|
act_cfg=dict(type='SiLU', inplace=True)), |
|
bbox_head=dict( |
|
type='YOLOv8Head', |
|
head_module=dict( |
|
type='YOLOv8HeadModule', |
|
num_classes=num_classes, |
|
in_channels=[256, 512, last_stage_out_channels], |
|
widen_factor=widen_factor, |
|
reg_max=16, |
|
norm_cfg=norm_cfg, |
|
act_cfg=dict(type='SiLU', inplace=True), |
|
featmap_strides=strides), |
|
prior_generator=dict( |
|
type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides), |
|
bbox_coder=dict(type='DistancePointBBoxCoder'), |
|
|
|
loss_cls=dict( |
|
type='mmdet.CrossEntropyLoss', |
|
use_sigmoid=True, |
|
reduction='none', |
|
loss_weight=loss_cls_weight), |
|
loss_bbox=dict( |
|
type='IoULoss', |
|
iou_mode='ciou', |
|
bbox_format='xyxy', |
|
reduction='sum', |
|
loss_weight=loss_bbox_weight, |
|
return_iou=False), |
|
loss_dfl=dict( |
|
type='mmdet.DistributionFocalLoss', |
|
reduction='mean', |
|
loss_weight=loss_dfl_weight)), |
|
train_cfg=dict( |
|
assigner=dict( |
|
type='BatchTaskAlignedAssigner', |
|
num_classes=num_classes, |
|
use_ciou=True, |
|
topk=tal_topk, |
|
alpha=tal_alpha, |
|
beta=tal_beta, |
|
eps=1e-9)), |
|
test_cfg=model_test_cfg) |
|
|
|
albu_train_transforms = [ |
|
dict(type='Blur', p=0.01), |
|
dict(type='MedianBlur', p=0.01), |
|
dict(type='ToGray', p=0.01), |
|
dict(type='CLAHE', p=0.01) |
|
] |
|
|
|
pre_transform = [ |
|
dict(type='LoadImageFromFile', backend_args=_base_.backend_args), |
|
dict(type='LoadAnnotations', with_bbox=True) |
|
] |
|
|
|
last_transform = [ |
|
dict( |
|
type='mmdet.Albu', |
|
transforms=albu_train_transforms, |
|
bbox_params=dict( |
|
type='BboxParams', |
|
format='pascal_voc', |
|
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), |
|
keymap={ |
|
'img': 'image', |
|
'gt_bboxes': 'bboxes' |
|
}), |
|
dict(type='YOLOv5HSVRandomAug'), |
|
dict(type='mmdet.RandomFlip', prob=0.5), |
|
dict( |
|
type='mmdet.PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', |
|
'flip_direction')) |
|
] |
|
|
|
train_pipeline = [ |
|
*pre_transform, |
|
dict( |
|
type='Mosaic', |
|
img_scale=img_scale, |
|
pad_val=114.0, |
|
pre_transform=pre_transform), |
|
dict( |
|
type='YOLOv5RandomAffine', |
|
max_rotate_degree=0.0, |
|
max_shear_degree=0.0, |
|
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), |
|
max_aspect_ratio=max_aspect_ratio, |
|
|
|
border=(-img_scale[0] // 2, -img_scale[1] // 2), |
|
border_val=(114, 114, 114)), |
|
*last_transform |
|
] |
|
|
|
train_pipeline_stage2 = [ |
|
*pre_transform, |
|
dict(type='YOLOv5KeepRatioResize', scale=img_scale), |
|
dict( |
|
type='LetterResize', |
|
scale=img_scale, |
|
allow_scale_up=True, |
|
pad_val=dict(img=114.0)), |
|
dict( |
|
type='YOLOv5RandomAffine', |
|
max_rotate_degree=0.0, |
|
max_shear_degree=0.0, |
|
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), |
|
max_aspect_ratio=max_aspect_ratio, |
|
border_val=(114, 114, 114)), *last_transform |
|
] |
|
|
|
train_dataloader = dict( |
|
batch_size=train_batch_size_per_gpu, |
|
num_workers=train_num_workers, |
|
persistent_workers=persistent_workers, |
|
pin_memory=True, |
|
sampler=dict(type='DefaultSampler', shuffle=True), |
|
collate_fn=dict(type='yolov5_collate'), |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
ann_file=train_ann_file, |
|
data_prefix=dict(img=train_data_prefix), |
|
filter_cfg=dict(filter_empty_gt=False, min_size=32), |
|
pipeline=train_pipeline)) |
|
|
|
test_pipeline = [ |
|
dict(type='LoadImageFromFile', backend_args=_base_.backend_args), |
|
dict(type='YOLOv5KeepRatioResize', scale=img_scale), |
|
dict( |
|
type='LetterResize', |
|
scale=img_scale, |
|
allow_scale_up=False, |
|
pad_val=dict(img=114)), |
|
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), |
|
dict( |
|
type='mmdet.PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor', 'pad_param')) |
|
] |
|
|
|
val_dataloader = dict( |
|
batch_size=val_batch_size_per_gpu, |
|
num_workers=val_num_workers, |
|
persistent_workers=persistent_workers, |
|
pin_memory=True, |
|
drop_last=False, |
|
sampler=dict(type='DefaultSampler', shuffle=False), |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
test_mode=True, |
|
data_prefix=dict(img=val_data_prefix), |
|
ann_file=val_ann_file, |
|
pipeline=test_pipeline, |
|
batch_shapes_cfg=batch_shapes_cfg)) |
|
|
|
test_dataloader = val_dataloader |
|
|
|
param_scheduler = None |
|
optim_wrapper = dict( |
|
type='OptimWrapper', |
|
clip_grad=dict(max_norm=10.0), |
|
optimizer=dict( |
|
type='SGD', |
|
lr=base_lr, |
|
momentum=0.937, |
|
weight_decay=weight_decay, |
|
nesterov=True, |
|
batch_size_per_gpu=train_batch_size_per_gpu), |
|
constructor='YOLOv5OptimizerConstructor') |
|
|
|
default_hooks = dict( |
|
param_scheduler=dict( |
|
type='YOLOv5ParamSchedulerHook', |
|
scheduler_type='linear', |
|
lr_factor=lr_factor, |
|
max_epochs=max_epochs), |
|
checkpoint=dict( |
|
type='CheckpointHook', |
|
interval=save_epoch_intervals, |
|
save_best='auto', |
|
max_keep_ckpts=max_keep_ckpts)) |
|
|
|
custom_hooks = [ |
|
dict( |
|
type='EMAHook', |
|
ema_type='ExpMomentumEMA', |
|
momentum=0.0001, |
|
update_buffers=True, |
|
strict_load=False, |
|
priority=49), |
|
dict( |
|
type='mmdet.PipelineSwitchHook', |
|
switch_epoch=max_epochs - close_mosaic_epochs, |
|
switch_pipeline=train_pipeline_stage2) |
|
] |
|
|
|
val_evaluator = dict( |
|
type='mmdet.CocoMetric', |
|
proposal_nums=(100, 1, 10), |
|
ann_file=data_root + val_ann_file, |
|
metric='bbox') |
|
test_evaluator = val_evaluator |
|
|
|
train_cfg = dict( |
|
type='EpochBasedTrainLoop', |
|
max_epochs=max_epochs, |
|
val_interval=save_epoch_intervals, |
|
dynamic_intervals=[((max_epochs - close_mosaic_epochs), |
|
val_interval_stage2)]) |
|
|
|
val_cfg = dict(type='ValLoop') |
|
test_cfg = dict(type='TestLoop') |
|
|