benchang1110 commited on
Commit
f1118e2
1 Parent(s): 80e67c4

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -0
  2. configuration_taivisionlm.py +110 -0
config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "hidden_size": 2048,
3
  "ignore_index": -100,
4
  "image_token_index": 32000,
 
1
  {
2
+ "auto_map": {
3
+ "AutoConfig": "configuration_taivisionlm.TaiVisionLMConfig"
4
+ },
5
  "hidden_size": 2048,
6
  "ignore_index": -100,
7
  "image_token_index": 32000,
configuration_taivisionlm.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """TraVisionLM configuration"""
2
+
3
+ from transformers import PretrainedConfig
4
+ from transformers import logging, CONFIG_MAPPING
5
+ import warnings
6
+ import transformers
7
+
8
+ logger = logging.get_logger(__name__)
9
+
10
+ class TaiVisionLMConfig(PretrainedConfig):
11
+ model_type = "taivisionlm"
12
+ is_composition = False
13
+
14
+ def __init__(
15
+ self,
16
+ vision_config=None,
17
+ text_config=None,
18
+ ignore_index=-100,
19
+ image_token_idx=32000,
20
+ vocab_size=32001,
21
+ projection_dim=768,
22
+ hidden_size=2048,
23
+ **kwargs,
24
+ ):
25
+ self.ignore_index = ignore_index
26
+ self.image_token_index = image_token_idx
27
+ self._vocab_size = vocab_size
28
+ self.projection_dim = projection_dim
29
+ self.hidden_size = hidden_size
30
+ self.vision_config = vision_config
31
+ self.is_encoder_decoder = False
32
+
33
+ if isinstance(self.vision_config, dict):
34
+ vision_config["model_type"] = (
35
+ vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
36
+ )
37
+ self.vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
38
+ elif vision_config is None:
39
+ self.vision_config = CONFIG_MAPPING["siglip_vision_model"](
40
+ attention_dropout=0.0,
41
+ hidden_act="gelu_pytorch_tanh",
42
+ hidden_size=768,
43
+ image_size=224,
44
+ intermediate_size=3072,
45
+ layer_norm_eps=1e-06,
46
+ num_attention_heads=12,
47
+ num_channels=3,
48
+ num_hidden_layers=12,
49
+ patch_size=16,
50
+ )
51
+
52
+ self.vocab_size = vocab_size
53
+ self.text_config = text_config
54
+
55
+ if isinstance(self.text_config, dict):
56
+ text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "gpt2"
57
+ self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
58
+ elif text_config is None:
59
+ self.text_config = CONFIG_MAPPING["llama"](
60
+ architecture= ["LlamaForCausalLM"],
61
+ hidden_act = "silu",
62
+ attention_bias = False,
63
+ attention_dropout = 0.0,
64
+ bos_token_id = 1,
65
+ eos_token_id = 2,
66
+ hidden_size = 2048,
67
+ initializer_range = 0.02,
68
+ intermediate_size = 5632,
69
+ max_position_embeddings = 2048,
70
+ model_type = "llama",
71
+ num_attention_heads = 32,
72
+ num_hidden_layers = 22,
73
+ num_key_value_heads = 4,
74
+ pretraining_tp = 1,
75
+ rms_norm_eps = 1e-05,
76
+ rope_scaling = None,
77
+ rope_theta = 10000.0,
78
+ tie_word_embeddings = False,
79
+ torch_dtype = "bfloat16",
80
+ transformers_version = "4.40.2",
81
+ use_cache = True,
82
+ vocab_size = 32000
83
+ )
84
+ self.num_image_tokens = (self.vision_config.image_size // self.vision_config.patch_size) ** 2
85
+ self.pad_token_id = self.text_config.pad_token_id
86
+ self.vision_config.projection_dim = projection_dim
87
+ super().__init__(**kwargs)
88
+
89
+ @property
90
+ def vocab_size(self):
91
+ warnings.warn(
92
+ "The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead.",
93
+ FutureWarning,
94
+ )
95
+ return self._vocab_size
96
+
97
+ @vocab_size.setter
98
+ def vocab_size(self, value):
99
+ self._vocab_size = value
100
+
101
+ def to_dict(self):
102
+ output = super().to_dict()
103
+ output.pop("_vocab_size", None)
104
+ return output
105
+
106
+ if __name__ == "__main__":
107
+ config = TaiVisionLMConfig()
108
+ TaiVisionLMConfig.register_for_auto_class()
109
+ config.push_to_hub("benchang1110/TaiVision-base")
110
+ config.save_pretrained("./")