cuierfei commited on
Commit
b1b3978
1 Parent(s): 8f9ec35

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. config.json +0 -56
  3. modeling_intern_vit.py +1 -0
  4. modeling_internvl_chat.py +1 -0
README.md CHANGED
@@ -65,7 +65,7 @@ For more information about the pipeline parameters, please refer to [here](https
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
- lmdeploy serve api_server OpenGVLab/InternVL2-8B-AWQ --server-port 23333
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
@@ -104,7 +104,7 @@ print(response)
104
 
105
  ## License
106
 
107
- This project is released under the MIT license, while InternLM is licensed under the Apache-2.0 license.
108
 
109
  ## Citation
110
 
 
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
+ lmdeploy serve api_server OpenGVLab/InternVL2-8B-AWQ --backend turbomind --server-port 23333 --model-format awq
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
 
104
 
105
  ## License
106
 
107
+ This project is released under the MIT license, while InternLM2 is licensed under the Apache-2.0 license.
108
 
109
  ## Citation
110
 
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "/mnt/bigdisk/InternVL2-8B",
4
  "architectures": [
5
  "InternVLChatModel"
6
  ],
@@ -116,91 +115,36 @@
116
  "select_layer": -1,
117
  "template": "internlm2-chat",
118
  "torch_dtype": "float16",
119
- "transformers_version": null,
120
  "use_backbone_lora": 0,
121
  "use_llm_lora": 0,
122
  "use_thumbnail": true,
123
  "vision_config": {
124
- "_name_or_path": "",
125
- "add_cross_attention": false,
126
  "architectures": [
127
  "InternVisionModel"
128
  ],
129
  "attention_dropout": 0.0,
130
- "bad_words_ids": null,
131
- "begin_suppress_tokens": null,
132
- "bos_token_id": null,
133
- "chunk_size_feed_forward": 0,
134
- "cross_attention_hidden_size": null,
135
- "decoder_start_token_id": null,
136
- "diversity_penalty": 0.0,
137
- "do_sample": false,
138
  "drop_path_rate": 0.0,
139
  "dropout": 0.0,
140
- "early_stopping": false,
141
- "encoder_no_repeat_ngram_size": 0,
142
- "eos_token_id": null,
143
- "exponential_decay_length_penalty": null,
144
- "finetuning_task": null,
145
- "forced_bos_token_id": null,
146
- "forced_eos_token_id": null,
147
  "hidden_act": "gelu",
148
  "hidden_size": 1024,
149
- "id2label": {
150
- "0": "LABEL_0",
151
- "1": "LABEL_1"
152
- },
153
  "image_size": 448,
154
  "initializer_factor": 1.0,
155
  "initializer_range": 0.02,
156
  "intermediate_size": 4096,
157
- "is_decoder": false,
158
- "is_encoder_decoder": false,
159
- "label2id": {
160
- "LABEL_0": 0,
161
- "LABEL_1": 1
162
- },
163
  "layer_norm_eps": 1e-06,
164
- "length_penalty": 1.0,
165
- "max_length": 20,
166
- "min_length": 0,
167
  "model_type": "intern_vit_6b",
168
- "no_repeat_ngram_size": 0,
169
  "norm_type": "layer_norm",
170
  "num_attention_heads": 16,
171
- "num_beam_groups": 1,
172
- "num_beams": 1,
173
  "num_channels": 3,
174
  "num_hidden_layers": 24,
175
- "num_return_sequences": 1,
176
  "output_attentions": false,
177
  "output_hidden_states": false,
178
- "output_scores": false,
179
- "pad_token_id": null,
180
  "patch_size": 14,
181
- "prefix": null,
182
- "problem_type": null,
183
- "pruned_heads": {},
184
  "qk_normalization": false,
185
  "qkv_bias": true,
186
- "remove_invalid_values": false,
187
- "repetition_penalty": 1.0,
188
  "return_dict": true,
189
- "return_dict_in_generate": false,
190
- "sep_token_id": null,
191
- "suppress_tokens": null,
192
- "task_specific_params": null,
193
- "temperature": 1.0,
194
- "tf_legacy_loss": false,
195
- "tie_encoder_decoder": false,
196
- "tie_word_embeddings": true,
197
- "tokenizer_class": null,
198
- "top_k": 50,
199
- "top_p": 1.0,
200
  "torch_dtype": "bfloat16",
201
- "torchscript": false,
202
  "transformers_version": "4.40.0",
203
- "typical_p": 1.0,
204
  "use_bfloat16": true,
205
  "use_flash_attn": true
206
  }
 
1
  {
2
  "_commit_hash": null,
 
3
  "architectures": [
4
  "InternVLChatModel"
5
  ],
 
115
  "select_layer": -1,
116
  "template": "internlm2-chat",
117
  "torch_dtype": "float16",
 
118
  "use_backbone_lora": 0,
119
  "use_llm_lora": 0,
120
  "use_thumbnail": true,
121
  "vision_config": {
 
 
122
  "architectures": [
123
  "InternVisionModel"
124
  ],
125
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
126
  "drop_path_rate": 0.0,
127
  "dropout": 0.0,
 
 
 
 
 
 
 
128
  "hidden_act": "gelu",
129
  "hidden_size": 1024,
 
 
 
 
130
  "image_size": 448,
131
  "initializer_factor": 1.0,
132
  "initializer_range": 0.02,
133
  "intermediate_size": 4096,
 
 
 
 
 
 
134
  "layer_norm_eps": 1e-06,
 
 
 
135
  "model_type": "intern_vit_6b",
 
136
  "norm_type": "layer_norm",
137
  "num_attention_heads": 16,
 
 
138
  "num_channels": 3,
139
  "num_hidden_layers": 24,
 
140
  "output_attentions": false,
141
  "output_hidden_states": false,
 
 
142
  "patch_size": 14,
 
 
 
143
  "qk_normalization": false,
144
  "qkv_bias": true,
 
 
145
  "return_dict": true,
 
 
 
 
 
 
 
 
 
 
 
146
  "torch_dtype": "bfloat16",
 
147
  "transformers_version": "4.40.0",
 
148
  "use_bfloat16": true,
149
  "use_flash_attn": true
150
  }
modeling_intern_vit.py CHANGED
@@ -368,6 +368,7 @@ class InternVisionEncoder(nn.Module):
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
 
371
  config_class = InternVisionConfig
372
  _no_split_modules = ['InternVisionEncoderLayer']
373
 
 
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
371
+ _supports_flash_attn_2 = True
372
  config_class = InternVisionConfig
373
  _no_split_modules = ['InternVisionEncoderLayer']
374
 
modeling_internvl_chat.py CHANGED
@@ -35,6 +35,7 @@ def version_cmp(v1, v2, op='eq'):
35
  class InternVLChatModel(PreTrainedModel):
36
  config_class = InternVLChatConfig
37
  main_input_name = 'pixel_values'
 
38
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
39
 
40
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
 
35
  class InternVLChatModel(PreTrainedModel):
36
  config_class = InternVLChatConfig
37
  main_input_name = 'pixel_values'
38
+ _supports_flash_attn_2 = True
39
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
40
 
41
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):