Spaces:
Paused
Paused
File size: 5,670 Bytes
6e14436 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# Copyright (c) Facebook, Inc. and its affiliates.
from detectron2.config import CfgNode as CN
def add_detic_config(cfg):
_C = cfg
_C.WITH_IMAGE_LABELS = False # Turn on co-training with classification data
# Open-vocabulary classifier
_C.MODEL.ROI_BOX_HEAD.USE_ZEROSHOT_CLS = False # Use fixed classifier for open-vocabulary detection
_C.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = 'datasets/metadata/lvis_v1_clip_a+cname.npy'
_C.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_DIM = 512
_C.MODEL.ROI_BOX_HEAD.NORM_WEIGHT = True
_C.MODEL.ROI_BOX_HEAD.NORM_TEMP = 50.0
_C.MODEL.ROI_BOX_HEAD.IGNORE_ZERO_CATS = False
_C.MODEL.ROI_BOX_HEAD.USE_BIAS = 0.0 # >= 0: not use
_C.MODEL.ROI_BOX_HEAD.MULT_PROPOSAL_SCORE = False # CenterNet2
_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
_C.MODEL.ROI_BOX_HEAD.PRIOR_PROB = 0.01
_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False # Federated Loss
_C.MODEL.ROI_BOX_HEAD.CAT_FREQ_PATH = \
'datasets/metadata/lvis_v1_train_cat_info.json'
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CAT = 50
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT = 0.5
# Classification data configs
_C.MODEL.ROI_BOX_HEAD.IMAGE_LABEL_LOSS = 'max_size' # max, softmax, sum
_C.MODEL.ROI_BOX_HEAD.IMAGE_LOSS_WEIGHT = 0.1
_C.MODEL.ROI_BOX_HEAD.IMAGE_BOX_SIZE = 1.0
_C.MODEL.ROI_BOX_HEAD.ADD_IMAGE_BOX = False # Used for image-box loss and caption loss
_C.MODEL.ROI_BOX_HEAD.WS_NUM_PROPS = 128 # num proposals for image-labeled data
_C.MODEL.ROI_BOX_HEAD.WITH_SOFTMAX_PROP = False # Used for WSDDN
_C.MODEL.ROI_BOX_HEAD.CAPTION_WEIGHT = 1.0 # Caption loss weight
_C.MODEL.ROI_BOX_HEAD.NEG_CAP_WEIGHT = 0.125 # Caption loss hyper-parameter
_C.MODEL.ROI_BOX_HEAD.ADD_FEATURE_TO_PROP = False # Used for WSDDN
_C.MODEL.ROI_BOX_HEAD.SOFTMAX_WEAK_LOSS = False # Used when USE_SIGMOID_CE is False
_C.MODEL.ROI_HEADS.MASK_WEIGHT = 1.0
_C.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = False # For demo only
# Caption losses
_C.MODEL.CAP_BATCH_RATIO = 4 # Ratio between detection data and caption data
_C.MODEL.WITH_CAPTION = False
_C.MODEL.SYNC_CAPTION_BATCH = False # synchronize across GPUs to enlarge # "classes"
# dynamic class sampling when training with 21K classes
_C.MODEL.DYNAMIC_CLASSIFIER = False
_C.MODEL.NUM_SAMPLE_CATS = 50
# Different classifiers in testing, used in cross-dataset evaluation
_C.MODEL.RESET_CLS_TESTS = False
_C.MODEL.TEST_CLASSIFIERS = []
_C.MODEL.TEST_NUM_CLASSES = []
# Backbones
_C.MODEL.SWIN = CN()
_C.MODEL.SWIN.SIZE = 'T' # 'T', 'S', 'B'
_C.MODEL.SWIN.USE_CHECKPOINT = False
_C.MODEL.SWIN.OUT_FEATURES = (1, 2, 3) # FPN stride 8 - 32
_C.MODEL.TIMM = CN()
_C.MODEL.TIMM.BASE_NAME = 'resnet50'
_C.MODEL.TIMM.OUT_LEVELS = (3, 4, 5)
_C.MODEL.TIMM.NORM = 'FrozenBN'
_C.MODEL.TIMM.FREEZE_AT = 0
_C.MODEL.DATASET_LOSS_WEIGHT = []
# Multi-dataset dataloader
_C.DATALOADER.DATASET_RATIO = [1, 1] # sample ratio
_C.DATALOADER.USE_RFS = [False, False]
_C.DATALOADER.MULTI_DATASET_GROUPING = False # Always true when multi-dataset is enabled
_C.DATALOADER.DATASET_ANN = ['box', 'box'] # Annotation type of each dataset
_C.DATALOADER.USE_DIFF_BS_SIZE = False # Use different batchsize for each dataset
_C.DATALOADER.DATASET_BS = [8, 32] # Used when USE_DIFF_BS_SIZE is on
_C.DATALOADER.DATASET_INPUT_SIZE = [896, 384] # Used when USE_DIFF_BS_SIZE is on
_C.DATALOADER.DATASET_INPUT_SCALE = [(0.1, 2.0), (0.5, 1.5)] # Used when USE_DIFF_BS_SIZE is on
_C.DATALOADER.DATASET_MIN_SIZES = [(640, 800), (320, 400)] # Used when USE_DIFF_BS_SIZE is on
_C.DATALOADER.DATASET_MAX_SIZES = [1333, 667] # Used when USE_DIFF_BS_SIZE is on
_C.DATALOADER.USE_TAR_DATASET = False # for ImageNet-21K, directly reading from unziped files
_C.DATALOADER.TARFILE_PATH = 'datasets/imagenet/metadata-22k/tar_files.npy'
_C.DATALOADER.TAR_INDEX_DIR = 'datasets/imagenet/metadata-22k/tarindex_npy'
_C.SOLVER.USE_CUSTOM_SOLVER = False
_C.SOLVER.OPTIMIZER = 'SGD'
_C.SOLVER.BACKBONE_MULTIPLIER = 1.0 # Used in DETR
_C.SOLVER.CUSTOM_MULTIPLIER = 1.0 # Used in DETR
_C.SOLVER.CUSTOM_MULTIPLIER_NAME = [] # Used in DETR
# Deformable DETR
_C.MODEL.DETR = CN()
_C.MODEL.DETR.NUM_CLASSES = 80
_C.MODEL.DETR.FROZEN_WEIGHTS = '' # For Segmentation
_C.MODEL.DETR.GIOU_WEIGHT = 2.0
_C.MODEL.DETR.L1_WEIGHT = 5.0
_C.MODEL.DETR.DEEP_SUPERVISION = True
_C.MODEL.DETR.NO_OBJECT_WEIGHT = 0.1
_C.MODEL.DETR.CLS_WEIGHT = 2.0
_C.MODEL.DETR.NUM_FEATURE_LEVELS = 4
_C.MODEL.DETR.TWO_STAGE = False
_C.MODEL.DETR.WITH_BOX_REFINE = False
_C.MODEL.DETR.FOCAL_ALPHA = 0.25
_C.MODEL.DETR.NHEADS = 8
_C.MODEL.DETR.DROPOUT = 0.1
_C.MODEL.DETR.DIM_FEEDFORWARD = 2048
_C.MODEL.DETR.ENC_LAYERS = 6
_C.MODEL.DETR.DEC_LAYERS = 6
_C.MODEL.DETR.PRE_NORM = False
_C.MODEL.DETR.HIDDEN_DIM = 256
_C.MODEL.DETR.NUM_OBJECT_QUERIES = 100
_C.MODEL.DETR.USE_FED_LOSS = False
_C.MODEL.DETR.WEAK_WEIGHT = 0.1
_C.INPUT.CUSTOM_AUG = ''
_C.INPUT.TRAIN_SIZE = 640
_C.INPUT.TEST_SIZE = 640
_C.INPUT.SCALE_RANGE = (0.1, 2.)
# 'default' for fixed short/ long edge, 'square' for max size=INPUT.SIZE
_C.INPUT.TEST_INPUT_TYPE = 'default'
_C.FIND_UNUSED_PARAM = True
_C.EVAL_PRED_AR = False
_C.EVAL_PROPOSAL_AR = False
_C.EVAL_CAT_SPEC_AR = False
_C.IS_DEBUG = False
_C.QUICK_DEBUG = False
_C.FP16 = False
_C.EVAL_AP_FIX = False
_C.GEN_PSEDO_LABELS = False
_C.SAVE_DEBUG_PATH = 'output/save_debug/' |