path_vitbase_size_256_128 / configuration_path.py
tuandunghcmut's picture
Upload model
af24273 verified
from transformers import PretrainedConfig
class PATHViTConfig(PretrainedConfig):
model_type = "vit-b16"
def __init__(
self,
img_size=224,
patch_size=16,
in_chans=3,
num_classes=80,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.0,
qkv_bias=True,
drop_path_rate=0.1,
norm_layer=None,
norm_layer_eps=1e-6,
window=True,
use_abs_pos_emb=True,
interval=3,
test_pos_mode=False,
task_sp_list=(),
neck_sp_list=(),
learnable_pos=False,
rel_pos_spatial=False,
lms_checkpoint_train=False,
prompt=None,
pad_attn_mask=False,
freeze_iters=0,
act_layer="GELU",
pre_ln=False,
mask_input=False,
ending_norm=True,
round_padding=False,
compat=False,
use_cls_token=False,
**kwargs,
):
super().__init__(**kwargs)
self.img_size = img_size
self.patch_size = patch_size
self.in_chans = in_chans
self.num_classes = num_classes
self.embed_dim = embed_dim
self.depth = depth
self.num_heads = num_heads
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.drop_path_rate = drop_path_rate
# NOTE: norm_layer is not used for building the model
self.norm_layer = norm_layer
self.norm_layer_eps = norm_layer_eps
self.window = window
self.use_abs_pos_emb = use_abs_pos_emb
self.interval = interval
self.test_pos_mode = test_pos_mode
self.task_sp_list = task_sp_list
self.neck_sp_list = neck_sp_list
self.learnable_pos = learnable_pos
self.rel_pos_spatial = rel_pos_spatial
self.lms_checkpoint_train = lms_checkpoint_train
self.prompt = prompt
self.pad_attn_mask = pad_attn_mask
self.freeze_iters = freeze_iters
self.act_layer = act_layer
self.pre_ln = pre_ln
self.mask_input = mask_input
self.ending_norm = ending_norm
self.round_padding = round_padding
self.compat = compat
self.use_cls_token = use_cls_token