# Copyright (c) OpenMMLab. All rights reserved. """pytest tests/test_forward.py.""" import copy from os.path import dirname, exists, join import numpy as np import pytest import torch def _get_config_directory(): """Find the predefined detector config directory.""" try: # Assume we are running in the source mmdetection repo repo_dpath = dirname(dirname(dirname(__file__))) except NameError: # For IPython development when this __file__ is not defined import mmdet repo_dpath = dirname(dirname(mmdet.__file__)) config_dpath = join(repo_dpath, 'configs') if not exists(config_dpath): raise Exception('Cannot find config path') return config_dpath def _get_config_module(fname): """Load a configuration as a python module.""" from mmcv import Config config_dpath = _get_config_directory() config_fpath = join(config_dpath, fname) config_mod = Config.fromfile(config_fpath) return config_mod def _get_detector_cfg(fname): """Grab configs necessary to create a detector. These are deep copied to allow for safe modification of parameters without influencing other tests. """ config = _get_config_module(fname) model = copy.deepcopy(config.model) return model def _replace_r50_with_r18(model): """Replace ResNet50 with ResNet18 in config.""" model = copy.deepcopy(model) if model.backbone.type == 'ResNet': model.backbone.depth = 18 model.backbone.base_channels = 2 model.neck.in_channels = [2, 4, 8, 16] return model def test_sparse_rcnn_forward(): config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' model = _get_detector_cfg(config_path) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) detector.init_weights() input_shape = (1, 3, 100, 100) mm_inputs = _demo_mm_inputs(input_shape, num_items=[5]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train with non-empty truth batch detector.train() gt_bboxes = mm_inputs['gt_bboxes'] gt_bboxes = [item for item in gt_bboxes] gt_labels = mm_inputs['gt_labels'] gt_labels = [item for item in gt_labels] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 detector.forward_dummy(imgs) # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_bboxes = mm_inputs['gt_bboxes'] gt_bboxes = [item for item in gt_bboxes] gt_labels = mm_inputs['gt_labels'] gt_labels = [item for item in gt_labels] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) batch_results.append(result) # test empty proposal in roi_head with torch.no_grad(): # test no proposal in the whole batch detector.roi_head.simple_test([imgs[0][None, :]], torch.empty( (1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]], torch.ones((1, 4))) def test_rpn_forward(): model = _get_detector_cfg('rpn/rpn_r50_fpn_1x_coco.py') model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 100, 100) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train gt_bboxes = mm_inputs['gt_bboxes'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, return_loss=True) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], return_loss=False) batch_results.append(result) @pytest.mark.parametrize( 'cfg_file', [ 'reppoints/reppoints_moment_r50_fpn_1x_coco.py', 'retinanet/retinanet_r50_fpn_1x_coco.py', 'guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py', 'ghm/retinanet_ghm_r50_fpn_1x_coco.py', 'fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py', 'foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py', # 'free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py', # 'atss/atss_r50_fpn_1x_coco.py', # not ready for topk 'yolo/yolov3_mobilenetv2_320_300e_coco.py', 'yolox/yolox_tiny_8x8_300e_coco.py' ]) def test_single_stage_forward_gpu(cfg_file): if not torch.cuda.is_available(): import pytest pytest.skip('test requires GPU and torch+cuda') model = _get_detector_cfg(cfg_file) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (2, 3, 128, 128) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') detector = detector.cuda() imgs = imgs.cuda() # Test forward train gt_bboxes = [b.cuda() for b in mm_inputs['gt_bboxes']] gt_labels = [g.cuda() for g in mm_inputs['gt_labels']] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], return_loss=False) batch_results.append(result) def test_faster_rcnn_ohem_forward(): model = _get_detector_cfg( 'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py') model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 100, 100) # Test forward train with a non-empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[10]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test RoI forward train with an empty proposals feature = detector.extract_feat(imgs[0][None, :]) losses = detector.roi_head.forward_train( feature, img_metas, [torch.empty((0, 5))], gt_bboxes=gt_bboxes, gt_labels=gt_labels) assert isinstance(losses, dict) @pytest.mark.parametrize( 'cfg_file', [ # 'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py', 'mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py', # 'grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py', # 'ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py', # 'htc/htc_r50_fpn_1x_coco.py', # 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py', # 'scnet/scnet_r50_fpn_20e_coco.py', # 'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py' # noqa: E501 ]) def test_two_stage_forward(cfg_file): models_with_semantic = [ 'htc/htc_r50_fpn_1x_coco.py', 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py', 'scnet/scnet_r50_fpn_20e_coco.py', ] if cfg_file in models_with_semantic: with_semantic = True else: with_semantic = False model = _get_detector_cfg(cfg_file) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None # Save cost if cfg_file in [ 'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py' # noqa: E501 ]: model.roi_head.bbox_head.num_classes = 80 model.roi_head.bbox_head.loss_cls.num_classes = 80 model.roi_head.mask_head.num_classes = 80 model.test_cfg.rcnn.score_thr = 0.05 model.test_cfg.rcnn.max_per_img = 100 from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 128, 128) # Test forward train with a non-empty truth batch mm_inputs = _demo_mm_inputs( input_shape, num_items=[10], with_semantic=with_semantic) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) loss.requires_grad_(True) assert float(loss.item()) > 0 loss.backward() # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs( input_shape, num_items=[0], with_semantic=with_semantic) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) loss.requires_grad_(True) assert float(loss.item()) > 0 loss.backward() # Test RoI forward train with an empty proposals if cfg_file in [ 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py' # noqa: E501 ]: mm_inputs.pop('gt_semantic_seg') feature = detector.extract_feat(imgs[0][None, :]) losses = detector.roi_head.forward_train(feature, img_metas, [torch.empty( (0, 5))], **mm_inputs) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], return_loss=False) batch_results.append(result) cascade_models = [ 'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py', 'htc/htc_r50_fpn_1x_coco.py', 'scnet/scnet_r50_fpn_20e_coco.py', ] # test empty proposal in roi_head with torch.no_grad(): # test no proposal in the whole batch detector.simple_test( imgs[0][None, :], [img_metas[0]], proposals=[torch.empty((0, 4))]) # test no proposal of aug features = detector.extract_feats([imgs[0][None, :]] * 2) detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2, [[img_metas[0]]] * 2) # test rcnn_test_cfg is None if cfg_file not in cascade_models: feature = detector.extract_feat(imgs[0][None, :]) bboxes, scores = detector.roi_head.simple_test_bboxes( feature, [img_metas[0]], [torch.empty((0, 4))], None) assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes]) assert all([ score.shape == torch.Size( (0, detector.roi_head.bbox_head.fc_cls.out_features)) for score in scores ]) # test no proposal in the some image x1y1 = torch.randint(1, 100, (10, 2)).float() # x2y2 must be greater than x1y1 x2y2 = x1y1 + torch.randint(1, 100, (10, 2)) detector.simple_test( imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2, proposals=[torch.empty((0, 4)), torch.cat([x1y1, x2y2], dim=-1)]) # test no proposal of aug detector.roi_head.aug_test( features, [torch.cat([x1y1, x2y2], dim=-1), torch.empty((0, 4))], [[img_metas[0]]] * 2) # test rcnn_test_cfg is None if cfg_file not in cascade_models: feature = detector.extract_feat(imgs[0][None, :].repeat( 2, 1, 1, 1)) bboxes, scores = detector.roi_head.simple_test_bboxes( feature, [img_metas[0]] * 2, [torch.empty((0, 4)), torch.cat([x1y1, x2y2], dim=-1)], None) assert bboxes[0].shape == torch.Size((0, 4)) assert scores[0].shape == torch.Size( (0, detector.roi_head.bbox_head.fc_cls.out_features)) @pytest.mark.parametrize( 'cfg_file', ['ghm/retinanet_ghm_r50_fpn_1x_coco.py', 'ssd/ssd300_coco.py']) def test_single_stage_forward_cpu(cfg_file): model = _get_detector_cfg(cfg_file) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 300, 300) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], return_loss=False) batch_results.append(result) def _demo_mm_inputs(input_shape=(1, 3, 300, 300), num_items=None, num_classes=10, with_semantic=False): # yapf: disable """Create a superset of inputs needed to run test or train batches. Args: input_shape (tuple): input batch dimensions num_items (None | List[int]): specifies the number of boxes in each batch item num_classes (int): number of different labels a box might have """ from mmdet.core import BitmapMasks (N, C, H, W) = input_shape rng = np.random.RandomState(0) imgs = rng.rand(*input_shape) img_metas = [{ 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '.png', 'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]), 'flip': False, 'flip_direction': None, } for _ in range(N)] gt_bboxes = [] gt_labels = [] gt_masks = [] for batch_idx in range(N): if num_items is None: num_boxes = rng.randint(1, 10) else: num_boxes = num_items[batch_idx] cx, cy, bw, bh = rng.rand(num_boxes, 4).T tl_x = ((cx * W) - (W * bw / 2)).clip(0, W) tl_y = ((cy * H) - (H * bh / 2)).clip(0, H) br_x = ((cx * W) + (W * bw / 2)).clip(0, W) br_y = ((cy * H) + (H * bh / 2)).clip(0, H) boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T class_idxs = rng.randint(1, num_classes, size=num_boxes) gt_bboxes.append(torch.FloatTensor(boxes)) gt_labels.append(torch.LongTensor(class_idxs)) mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8) gt_masks.append(BitmapMasks(mask, H, W)) mm_inputs = { 'imgs': torch.FloatTensor(imgs).requires_grad_(True), 'img_metas': img_metas, 'gt_bboxes': gt_bboxes, 'gt_labels': gt_labels, 'gt_bboxes_ignore': None, 'gt_masks': gt_masks, } if with_semantic: # assume gt_semantic_seg using scale 1/8 of the img gt_semantic_seg = np.random.randint( 0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8) mm_inputs.update( {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)}) return mm_inputs def test_yolact_forward(): model = _get_detector_cfg('yolact/yolact_r50_1x8_coco.py') model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 100, 100) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train detector.train() gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] gt_masks = mm_inputs['gt_masks'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_masks=gt_masks, return_loss=True) assert isinstance(losses, dict) # Test forward dummy for get_flops detector.forward_dummy(imgs) # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) batch_results.append(result) def test_detr_forward(): model = _get_detector_cfg('detr/detr_r50_8x2_150e_coco.py') model.backbone.depth = 18 model.bbox_head.in_channels = 512 model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 100, 100) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train with non-empty truth batch detector.train() gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] losses = detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) batch_results.append(result) def test_inference_detector(): from mmcv import ConfigDict from mmdet.apis import inference_detector from mmdet.models import build_detector # small RetinaNet num_class = 3 model_dict = dict( type='RetinaNet', backbone=dict( type='ResNet', depth=18, num_stages=4, out_indices=(3, ), norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='pytorch'), neck=None, bbox_head=dict( type='RetinaHead', num_classes=num_class, in_channels=512, stacked_convs=1, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', octave_base_scale=4, scales_per_octave=3, ratios=[0.5], strides=[32]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]), ), test_cfg=dict( nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type='nms', iou_threshold=0.5), max_per_img=100)) rng = np.random.RandomState(0) img1 = rng.rand(100, 100, 3) img2 = rng.rand(100, 100, 3) model = build_detector(ConfigDict(model_dict)) config = _get_config_module('retinanet/retinanet_r50_fpn_1x_coco.py') model.cfg = config # test single image result = inference_detector(model, img1) assert len(result) == num_class # test multiple image result = inference_detector(model, [img1, img2]) assert len(result) == 2 and len(result[0]) == num_class def test_yolox_random_size(): from mmdet.models import build_detector model = _get_detector_cfg('yolox/yolox_tiny_8x8_300e_coco.py') model.random_size_range = (2, 2) model.input_size = (64, 96) model.random_size_interval = 1 detector = build_detector(model) input_shape = (1, 3, 64, 64) mm_inputs = _demo_mm_inputs(input_shape) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train with non-empty truth batch detector.train() gt_bboxes = mm_inputs['gt_bboxes'] gt_labels = mm_inputs['gt_labels'] detector.forward( imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert detector._input_size == (64, 96) def test_maskformer_forward(): model_cfg = _get_detector_cfg( 'maskformer/maskformer_r50_mstrain_16x1_75e_coco.py') base_channels = 32 model_cfg.backbone.depth = 18 model_cfg.backbone.init_cfg = None model_cfg.backbone.base_channels = base_channels model_cfg.panoptic_head.in_channels = [ base_channels * 2**i for i in range(4) ] model_cfg.panoptic_head.feat_channels = base_channels model_cfg.panoptic_head.out_channels = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.attn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.ffn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 model_cfg.panoptic_head.pixel_decoder.\ positional_encoding.num_feats = base_channels // 2 model_cfg.panoptic_head.positional_encoding.\ num_feats = base_channels // 2 model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.attn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.ffn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.feedforward_channels = base_channels * 8 from mmdet.core import BitmapMasks from mmdet.models import build_detector detector = build_detector(model_cfg) # Test forward train with non-empty truth batch detector.train() img_metas = [ { 'batch_input_shape': (128, 160), 'img_shape': (126, 160, 3), 'ori_shape': (63, 80, 3), 'pad_shape': (128, 160, 3) }, ] img = torch.rand((1, 3, 128, 160)) gt_bboxes = None gt_labels = [ torch.tensor([10]).long(), ] thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) thing_mask1[0, :50] = 1 gt_masks = [ BitmapMasks(thing_mask1, 128, 160), ] stuff_mask1 = torch.zeros((1, 128, 160)).long() stuff_mask1[0, :50] = 10 stuff_mask1[0, 50:] = 100 gt_semantic_seg = [ stuff_mask1, ] losses = detector.forward( img=img, img_metas=img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_masks=gt_masks, gt_semantic_seg=gt_semantic_seg, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward train with an empty truth batch gt_bboxes = [ torch.empty((0, 4)).float(), ] gt_labels = [ torch.empty((0, )).long(), ] mask = np.zeros((0, 128, 160), dtype=np.uint8) gt_masks = [ BitmapMasks(mask, 128, 160), ] gt_semantic_seg = [ torch.randint(0, 133, (0, 128, 160)), ] losses = detector.forward( img, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_masks=gt_masks, gt_semantic_seg=gt_semantic_seg, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in img] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) batch_results.append(result) @pytest.mark.parametrize('cfg_file', [ 'mask2former/mask2former_r50_lsj_8x2_50e_coco.py', 'mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py' ]) def test_mask2former_forward(cfg_file): # Test Panoptic Segmentation and Instance Segmentation model_cfg = _get_detector_cfg(cfg_file) base_channels = 32 model_cfg.backbone.depth = 18 model_cfg.backbone.init_cfg = None model_cfg.backbone.base_channels = base_channels model_cfg.panoptic_head.in_channels = [ base_channels * 2**i for i in range(4) ] model_cfg.panoptic_head.feat_channels = base_channels model_cfg.panoptic_head.out_channels = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.attn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.ffn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.pixel_decoder.encoder.\ transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 4 model_cfg.panoptic_head.pixel_decoder.\ positional_encoding.num_feats = base_channels // 2 model_cfg.panoptic_head.positional_encoding.\ num_feats = base_channels // 2 model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.attn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.ffn_cfgs.embed_dims = base_channels model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 model_cfg.panoptic_head.transformer_decoder.\ transformerlayers.feedforward_channels = base_channels * 8 num_stuff_classes = model_cfg.panoptic_head.num_stuff_classes from mmdet.core import BitmapMasks from mmdet.models import build_detector detector = build_detector(model_cfg) def _forward_train(): losses = detector.forward( img, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_masks=gt_masks, gt_semantic_seg=gt_semantic_seg, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward train with non-empty truth batch detector.train() img_metas = [ { 'batch_input_shape': (128, 160), 'img_shape': (126, 160, 3), 'ori_shape': (63, 80, 3), 'pad_shape': (128, 160, 3) }, ] img = torch.rand((1, 3, 128, 160)) gt_bboxes = None gt_labels = [ torch.tensor([10]).long(), ] thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) thing_mask1[0, :50] = 1 gt_masks = [ BitmapMasks(thing_mask1, 128, 160), ] stuff_mask1 = torch.zeros((1, 128, 160)).long() stuff_mask1[0, :50] = 10 stuff_mask1[0, 50:] = 100 gt_semantic_seg = [ stuff_mask1, ] _forward_train() # Test forward train with non-empty truth batch and gt_semantic_seg=None gt_semantic_seg = None _forward_train() # Test forward train with an empty truth batch gt_bboxes = [ torch.empty((0, 4)).float(), ] gt_labels = [ torch.empty((0, )).long(), ] mask = np.zeros((0, 128, 160), dtype=np.uint8) gt_masks = [ BitmapMasks(mask, 128, 160), ] gt_semantic_seg = [ torch.randint(0, 133, (0, 128, 160)), ] _forward_train() # Test forward train with an empty truth batch and gt_semantic_seg=None gt_semantic_seg = None _forward_train() # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in img] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) if num_stuff_classes > 0: assert isinstance(result[0], dict) else: assert isinstance(result[0], tuple) batch_results.append(result)