File size: 4,130 Bytes
3bbb319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmcv import ConfigDict
from mmdet.models.dense_heads import DETRHead
def test_detr_head_loss():
"""Tests transformer head loss when truth is empty and non-empty."""
s = 256
img_metas = [{
'img_shape': (s, s, 3),
'scale_factor': 1,
'pad_shape': (s, s, 3),
'batch_input_shape': (s, s)
}]
config = ConfigDict(
dict(
type='DETRHead',
num_classes=80,
in_channels=200,
transformer=dict(
type='Transformer',
encoder=dict(
type='DetrTransformerEncoder',
num_layers=6,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1)
],
feedforward_channels=2048,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
decoder=dict(
type='DetrTransformerDecoder',
return_intermediate=True,
num_layers=6,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
feedforward_channels=2048,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn',
'norm', 'ffn', 'norm')),
)),
positional_encoding=dict(
type='SinePositionalEncoding', num_feats=128, normalize=True),
loss_cls=dict(
type='CrossEntropyLoss',
bg_cls_weight=0.1,
use_sigmoid=False,
loss_weight=1.0,
class_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=5.0),
loss_iou=dict(type='GIoULoss', loss_weight=2.0)))
self = DETRHead(**config)
self.init_weights()
feat = [torch.rand(1, 200, 10, 10)]
cls_scores, bbox_preds = self.forward(feat, img_metas)
# Test that empty ground truth encourages the network to predict background
gt_bboxes = [torch.empty((0, 4))]
gt_labels = [torch.LongTensor([])]
gt_bboxes_ignore = None
empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
img_metas, gt_bboxes_ignore)
# When there is no truth, the cls loss should be nonzero but there should
# be no box loss.
for key, loss in empty_gt_losses.items():
if 'cls' in key:
assert loss.item() > 0, 'cls loss should be non-zero'
elif 'bbox' in key:
assert loss.item(
) == 0, 'there should be no box loss when there are no true boxes'
elif 'iou' in key:
assert loss.item(
) == 0, 'there should be no iou loss when there are no true boxes'
# When truth is non-empty then both cls and box loss should be nonzero for
# random inputs
gt_bboxes = [
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
]
gt_labels = [torch.LongTensor([2])]
one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
img_metas, gt_bboxes_ignore)
for loss in one_gt_losses.values():
assert loss.item(
) > 0, 'cls loss, or box loss, or iou loss should be non-zero'
# test forward_train
self.forward_train(feat, img_metas, gt_bboxes, gt_labels)
# test inference mode
self.get_bboxes(cls_scores, bbox_preds, img_metas, rescale=True)
|