Upload folder using huggingface_hub
Browse files- README.md +146 -1
- config.json +1 -1
- configuration_intern_vit.py +1 -1
- configuration_internvl_chat.py +1 -1
- modeling_intern_vit.py +1 -1
- modeling_phi3.py +9 -0
README.md
CHANGED
@@ -57,6 +57,8 @@ Limitations: Although we have made efforts to ensure the safety of the model dur
|
|
57 |
|
58 |
We provide an example code to run Mini-InternVL-Chat-4B-V1-5 using `transformers`.
|
59 |
|
|
|
|
|
60 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
61 |
|
62 |
```python
|
@@ -301,7 +303,150 @@ print(f'Assistant: {response}')
|
|
301 |
|
302 |
### LMDeploy
|
303 |
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
## License
|
307 |
|
|
|
57 |
|
58 |
We provide an example code to run Mini-InternVL-Chat-4B-V1-5 using `transformers`.
|
59 |
|
60 |
+
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/). Currently, due to the limited GPU resources with public IP addresses, we can only deploy models up to a maximum of 26B. We will expand soon and deploy larger models to the online demo.
|
61 |
+
|
62 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
63 |
|
64 |
```python
|
|
|
303 |
|
304 |
### LMDeploy
|
305 |
|
306 |
+
LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by the MMRazor and MMDeploy teams.
|
307 |
+
|
308 |
+
```sh
|
309 |
+
pip install lmdeploy
|
310 |
+
```
|
311 |
+
|
312 |
+
LMDeploy abstracts the complex inference process of multi-modal Vision-Language Models (VLM) into an easy-to-use pipeline, similar to the Large Language Model (LLM) inference pipeline.
|
313 |
+
|
314 |
+
#### A 'Hello, world' example
|
315 |
+
|
316 |
+
```python
|
317 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
318 |
+
from lmdeploy.vl import load_image
|
319 |
+
|
320 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
321 |
+
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
322 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
323 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
324 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
325 |
+
response = pipe(('describe this image', image))
|
326 |
+
print(response.text)
|
327 |
+
```
|
328 |
+
|
329 |
+
If `ImportError` occurs while executing this case, please install the required dependency packages as prompted.
|
330 |
+
|
331 |
+
#### Multi-images inference
|
332 |
+
|
333 |
+
When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
|
334 |
+
|
335 |
+
> Warning: Due to the scarcity of multi-image conversation data, the performance on multi-image tasks may be unstable, and it may require multiple attempts to achieve satisfactory results.
|
336 |
+
|
337 |
+
```python
|
338 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
339 |
+
from lmdeploy.vl import load_image
|
340 |
+
from lmdeploy.vl.constants import IMAGE_TOKEN
|
341 |
+
|
342 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
343 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
344 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
345 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
346 |
+
|
347 |
+
image_urls=[
|
348 |
+
'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg',
|
349 |
+
'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg'
|
350 |
+
]
|
351 |
+
|
352 |
+
images = [load_image(img_url) for img_url in image_urls]
|
353 |
+
# Numbering images improves multi-image conversations
|
354 |
+
response = pipe((f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\ndescribe these two images', images))
|
355 |
+
print(response.text)
|
356 |
+
```
|
357 |
+
|
358 |
+
#### Batch prompts inference
|
359 |
+
|
360 |
+
Conducting inference with batch prompts is quite straightforward; just place them within a list structure:
|
361 |
+
|
362 |
+
```python
|
363 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
364 |
+
from lmdeploy.vl import load_image
|
365 |
+
|
366 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
367 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
368 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
369 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
370 |
+
|
371 |
+
image_urls=[
|
372 |
+
"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg",
|
373 |
+
"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg"
|
374 |
+
]
|
375 |
+
prompts = [('describe this image', load_image(img_url)) for img_url in image_urls]
|
376 |
+
response = pipe(prompts)
|
377 |
+
print(response)
|
378 |
+
```
|
379 |
+
|
380 |
+
#### Multi-turn conversation
|
381 |
+
|
382 |
+
There are two ways to do the multi-turn conversations with the pipeline. One is to construct messages according to the format of OpenAI and use above introduced method, the other is to use the `pipeline.chat` interface.
|
383 |
+
|
384 |
+
```python
|
385 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig, GenerationConfig
|
386 |
+
from lmdeploy.vl import load_image
|
387 |
+
|
388 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
389 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
390 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
391 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
392 |
+
|
393 |
+
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg')
|
394 |
+
gen_config = GenerationConfig(top_k=40, top_p=0.8, temperature=0.8)
|
395 |
+
sess = pipe.chat(('describe this image', image), gen_config=gen_config)
|
396 |
+
print(sess.response.text)
|
397 |
+
sess = pipe.chat('What is the woman doing?', session=sess, gen_config=gen_config)
|
398 |
+
print(sess.response.text)
|
399 |
+
```
|
400 |
+
|
401 |
+
#### Service
|
402 |
+
|
403 |
+
LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
|
404 |
+
|
405 |
+
```shell
|
406 |
+
lmdeploy serve api_server OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --model-name Mini-InternVL-Chat-4B-V1-5 --backend pytorch --server-port 23333
|
407 |
+
```
|
408 |
+
|
409 |
+
To use the OpenAI-style interface, you need to install OpenAI:
|
410 |
+
|
411 |
+
```shell
|
412 |
+
pip install openai
|
413 |
+
```
|
414 |
+
|
415 |
+
Then, use the code below to make the API call:
|
416 |
+
|
417 |
+
```python
|
418 |
+
from openai import OpenAI
|
419 |
+
|
420 |
+
client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
|
421 |
+
model_name = client.models.list().data[0].id
|
422 |
+
response = client.chat.completions.create(
|
423 |
+
model="Mini-InternVL-Chat-4B-V1-5",
|
424 |
+
messages=[{
|
425 |
+
'role':
|
426 |
+
'user',
|
427 |
+
'content': [{
|
428 |
+
'type': 'text',
|
429 |
+
'text': 'describe this image',
|
430 |
+
}, {
|
431 |
+
'type': 'image_url',
|
432 |
+
'image_url': {
|
433 |
+
'url':
|
434 |
+
'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
|
435 |
+
},
|
436 |
+
}],
|
437 |
+
}],
|
438 |
+
temperature=0.8,
|
439 |
+
top_p=0.8)
|
440 |
+
print(response)
|
441 |
+
```
|
442 |
+
|
443 |
+
### vLLM
|
444 |
+
|
445 |
+
TODO
|
446 |
+
|
447 |
+
### Ollama
|
448 |
+
|
449 |
+
TODO
|
450 |
|
451 |
## License
|
452 |
|
config.json
CHANGED
@@ -193,7 +193,7 @@
|
|
193 |
"tie_word_embeddings": false,
|
194 |
"tokenizer_class": null,
|
195 |
"top_k": 50,
|
196 |
-
"top_p":
|
197 |
"torch_dtype": "bfloat16",
|
198 |
"torchscript": false,
|
199 |
"transformers_version": "4.37.2",
|
|
|
193 |
"tie_word_embeddings": false,
|
194 |
"tokenizer_class": null,
|
195 |
"top_k": 50,
|
196 |
+
"top_p": 1.0,
|
197 |
"torch_dtype": "bfloat16",
|
198 |
"torchscript": false,
|
199 |
"transformers_version": "4.37.2",
|
configuration_intern_vit.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
import os
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
import os
|
configuration_internvl_chat.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
|
modeling_intern_vit.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
from typing import Optional, Tuple, Union
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
from typing import Optional, Tuple, Union
|
modeling_phi3.py
CHANGED
@@ -53,6 +53,7 @@ try:
|
|
53 |
unpad_input)
|
54 |
|
55 |
_flash_supports_window_size = 'window_size' in list(inspect.signature(flash_attn_func).parameters)
|
|
|
56 |
except ImportError as error:
|
57 |
logger.warning(
|
58 |
f'`flash-attention` package not found, consider installing for better performance: {error}.'
|
@@ -61,6 +62,7 @@ except ImportError as error:
|
|
61 |
logger.warning(
|
62 |
"Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`."
|
63 |
)
|
|
|
64 |
|
65 |
_CHECKPOINT_FOR_DOC = 'microsoft/Phi-3-mini-4k-instruct'
|
66 |
_CONFIG_FOR_DOC = 'Phi3Config'
|
@@ -937,6 +939,12 @@ class Phi3PreTrainedModel(PreTrainedModel):
|
|
937 |
|
938 |
_version = '0.0.5'
|
939 |
|
|
|
|
|
|
|
|
|
|
|
|
|
940 |
def _init_weights(self, module):
|
941 |
std = self.config.initializer_range
|
942 |
if isinstance(module, nn.Linear):
|
@@ -1042,6 +1050,7 @@ class Phi3Model(Phi3PreTrainedModel):
|
|
1042 |
[Phi3DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
|
1043 |
)
|
1044 |
self._attn_implementation = config._attn_implementation
|
|
|
1045 |
self.norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
1046 |
|
1047 |
self.gradient_checkpointing = False
|
|
|
53 |
unpad_input)
|
54 |
|
55 |
_flash_supports_window_size = 'window_size' in list(inspect.signature(flash_attn_func).parameters)
|
56 |
+
has_flash_attn = True
|
57 |
except ImportError as error:
|
58 |
logger.warning(
|
59 |
f'`flash-attention` package not found, consider installing for better performance: {error}.'
|
|
|
62 |
logger.warning(
|
63 |
"Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`."
|
64 |
)
|
65 |
+
has_flash_attn = False
|
66 |
|
67 |
_CHECKPOINT_FOR_DOC = 'microsoft/Phi-3-mini-4k-instruct'
|
68 |
_CONFIG_FOR_DOC = 'Phi3Config'
|
|
|
939 |
|
940 |
_version = '0.0.5'
|
941 |
|
942 |
+
def __init__(self, config: Phi3Config):
|
943 |
+
if not has_flash_attn:
|
944 |
+
config._attn_implementation = 'eager'
|
945 |
+
print('Warning: Flash attention is not available, using eager attention instead.')
|
946 |
+
super().__init__(config)
|
947 |
+
|
948 |
def _init_weights(self, module):
|
949 |
std = self.config.initializer_range
|
950 |
if isinstance(module, nn.Linear):
|
|
|
1050 |
[Phi3DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
|
1051 |
)
|
1052 |
self._attn_implementation = config._attn_implementation
|
1053 |
+
|
1054 |
self.norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
1055 |
|
1056 |
self.gradient_checkpointing = False
|