from transformers import PretrainedConfig class PATHViTConfig(PretrainedConfig): model_type = "vit-b16" def __init__( self, img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True, drop_path_rate=0.1, norm_layer=None, norm_layer_eps=1e-6, window=True, use_abs_pos_emb=True, interval=3, test_pos_mode=False, task_sp_list=(), neck_sp_list=(), learnable_pos=False, rel_pos_spatial=False, lms_checkpoint_train=False, prompt=None, pad_attn_mask=False, freeze_iters=0, act_layer="GELU", pre_ln=False, mask_input=False, ending_norm=True, round_padding=False, compat=False, use_cls_token=False, **kwargs, ): super().__init__(**kwargs) self.img_size = img_size self.patch_size = patch_size self.in_chans = in_chans self.num_classes = num_classes self.embed_dim = embed_dim self.depth = depth self.num_heads = num_heads self.mlp_ratio = mlp_ratio self.qkv_bias = qkv_bias self.drop_path_rate = drop_path_rate # NOTE: norm_layer is not used for building the model self.norm_layer = norm_layer self.norm_layer_eps = norm_layer_eps self.window = window self.use_abs_pos_emb = use_abs_pos_emb self.interval = interval self.test_pos_mode = test_pos_mode self.task_sp_list = task_sp_list self.neck_sp_list = neck_sp_list self.learnable_pos = learnable_pos self.rel_pos_spatial = rel_pos_spatial self.lms_checkpoint_train = lms_checkpoint_train self.prompt = prompt self.pad_attn_mask = pad_attn_mask self.freeze_iters = freeze_iters self.act_layer = act_layer self.pre_ln = pre_ln self.mask_input = mask_input self.ending_norm = ending_norm self.round_padding = round_padding self.compat = compat self.use_cls_token = use_cls_token