Vision-CAIR
commited on
Commit
•
efcffe8
1
Parent(s):
92fb897
Upload folder using huggingface_hub
Browse files- modeling_llama_v2.py +0 -1
- registry.py +16 -19
- utils.py +0 -133
modeling_llama_v2.py
CHANGED
@@ -9,7 +9,6 @@ from transformers.utils import add_start_docstrings_to_model_forward, replace_re
|
|
9 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
10 |
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
|
11 |
from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
12 |
-
# from minigpt4_video.models.transformers.src.transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
13 |
|
14 |
class LlamaForCausalLM(LlamaForCausalLMOrig):
|
15 |
|
|
|
9 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
10 |
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
|
11 |
from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
|
|
12 |
|
13 |
class LlamaForCausalLM(LlamaForCausalLMOrig):
|
14 |
|
registry.py
CHANGED
@@ -26,24 +26,21 @@ class Registry:
|
|
26 |
Args:
|
27 |
name: Key with which the task will be registered.
|
28 |
|
29 |
-
Usage:
|
30 |
-
|
31 |
-
from minigpt4.common.registry import registry
|
32 |
"""
|
33 |
|
34 |
def wrap(model_cls):
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
cls.mapping["model_name_mapping"][name] = model_cls
|
48 |
return model_cls
|
49 |
|
@@ -58,7 +55,7 @@ class Registry:
|
|
58 |
|
59 |
Usage:
|
60 |
|
61 |
-
from
|
62 |
"""
|
63 |
|
64 |
def wrap(processor_cls):
|
@@ -87,7 +84,7 @@ class Registry:
|
|
87 |
|
88 |
Usage:
|
89 |
|
90 |
-
from
|
91 |
"""
|
92 |
|
93 |
def wrap(lr_sched_cls):
|
@@ -111,7 +108,7 @@ class Registry:
|
|
111 |
|
112 |
Usage:
|
113 |
|
114 |
-
|
115 |
"""
|
116 |
|
117 |
def wrap(runner_cls):
|
@@ -135,7 +132,7 @@ class Registry:
|
|
135 |
|
136 |
Usage:
|
137 |
|
138 |
-
from
|
139 |
"""
|
140 |
assert isinstance(path, str), "All path must be str."
|
141 |
if name in cls.mapping["paths"]:
|
@@ -151,7 +148,7 @@ class Registry:
|
|
151 |
|
152 |
Usage::
|
153 |
|
154 |
-
from
|
155 |
|
156 |
registry.register("config", {})
|
157 |
"""
|
@@ -260,7 +257,7 @@ class Registry:
|
|
260 |
name: Key which needs to be removed.
|
261 |
Usage::
|
262 |
|
263 |
-
from
|
264 |
|
265 |
config = registry.unregister("config")
|
266 |
"""
|
|
|
26 |
Args:
|
27 |
name: Key with which the task will be registered.
|
28 |
|
|
|
|
|
|
|
29 |
"""
|
30 |
|
31 |
def wrap(model_cls):
|
32 |
+
from .base_model import BaseModel
|
33 |
|
34 |
+
assert issubclass(
|
35 |
+
model_cls, BaseModel
|
36 |
+
), "All models must inherit BaseModel class"
|
37 |
|
38 |
+
if name in cls.mapping["model_name_mapping"]:
|
39 |
+
raise KeyError(
|
40 |
+
"Name '{}' already registered for {}.".format(
|
41 |
+
name, cls.mapping["model_name_mapping"][name]
|
42 |
+
)
|
43 |
+
)
|
44 |
cls.mapping["model_name_mapping"][name] = model_cls
|
45 |
return model_cls
|
46 |
|
|
|
55 |
|
56 |
Usage:
|
57 |
|
58 |
+
from .registry import registry
|
59 |
"""
|
60 |
|
61 |
def wrap(processor_cls):
|
|
|
84 |
|
85 |
Usage:
|
86 |
|
87 |
+
from .registry import registry
|
88 |
"""
|
89 |
|
90 |
def wrap(lr_sched_cls):
|
|
|
108 |
|
109 |
Usage:
|
110 |
|
111 |
+
.common.registry import registry
|
112 |
"""
|
113 |
|
114 |
def wrap(runner_cls):
|
|
|
132 |
|
133 |
Usage:
|
134 |
|
135 |
+
from .registry import registry
|
136 |
"""
|
137 |
assert isinstance(path, str), "All path must be str."
|
138 |
if name in cls.mapping["paths"]:
|
|
|
148 |
|
149 |
Usage::
|
150 |
|
151 |
+
from .registry import registry
|
152 |
|
153 |
registry.register("config", {})
|
154 |
"""
|
|
|
257 |
name: Key which needs to be removed.
|
258 |
Usage::
|
259 |
|
260 |
+
from registry import registry
|
261 |
|
262 |
config = registry.unregister("config")
|
263 |
"""
|
utils.py
CHANGED
@@ -468,136 +468,3 @@ def merge_vision_embeddings(input_ids: torch.Tensor,
|
|
468 |
inputs_embeds[mask] = torch.cat(vision_embeddings)
|
469 |
|
470 |
return inputs_embeds
|
471 |
-
|
472 |
-
|
473 |
-
class LayerFn(Protocol):
|
474 |
-
|
475 |
-
def __call__(
|
476 |
-
self,
|
477 |
-
prefix="",
|
478 |
-
) -> torch.nn.Module:
|
479 |
-
...
|
480 |
-
|
481 |
-
|
482 |
-
class PPMissingLayer(torch.nn.Identity):
|
483 |
-
"""
|
484 |
-
A placeholder layer for missing layers in a pipeline parallel model.
|
485 |
-
"""
|
486 |
-
|
487 |
-
def __init__(self, *args, **kwargs):
|
488 |
-
super().__init__()
|
489 |
-
|
490 |
-
|
491 |
-
_CPU_OFFLOAD_BYTES = 0
|
492 |
-
_CPU_OFFLOAD_MAX_BYTES = 0
|
493 |
-
|
494 |
-
|
495 |
-
def set_cpu_offload_max_bytes(max_bytes: int) -> None:
|
496 |
-
global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
|
497 |
-
_CPU_OFFLOAD_BYTES = 0
|
498 |
-
_CPU_OFFLOAD_MAX_BYTES = max_bytes
|
499 |
-
|
500 |
-
|
501 |
-
def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
|
502 |
-
device = next(module.parameters()).device
|
503 |
-
|
504 |
-
if device == torch.device("cpu"):
|
505 |
-
return module
|
506 |
-
|
507 |
-
global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
|
508 |
-
if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
|
509 |
-
return module
|
510 |
-
|
511 |
-
pin_memory = is_pin_memory_available()
|
512 |
-
|
513 |
-
# offload parameters to CPU
|
514 |
-
# use pin_memory if possible, which helps cudagraph capture speed
|
515 |
-
for p in module.parameters():
|
516 |
-
if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
|
517 |
-
# we use per-parameter offloading
|
518 |
-
# one module might have some parameters offloaded and some not
|
519 |
-
break
|
520 |
-
|
521 |
-
# `torch.empty_like` does not support `pin_memory` argument
|
522 |
-
cpu_data = torch.empty(size=p.data.size(),
|
523 |
-
dtype=p.data.dtype,
|
524 |
-
layout=p.data.layout,
|
525 |
-
device='cpu',
|
526 |
-
pin_memory=pin_memory)
|
527 |
-
cpu_data.copy_(p.data)
|
528 |
-
p.data = cpu_data
|
529 |
-
_CPU_OFFLOAD_BYTES += p.data.numel() * p.data.element_size()
|
530 |
-
|
531 |
-
state_dict: Dict[str, torch.Tensor] = module.state_dict()
|
532 |
-
|
533 |
-
original_forward = module.forward
|
534 |
-
|
535 |
-
def forward(*args, **kwargs):
|
536 |
-
module.forward = original_forward
|
537 |
-
device_state = {
|
538 |
-
# here we blindly call `to(device)`
|
539 |
-
# if the parameter is already on the device, it will be a no-op
|
540 |
-
k: v.to(device, non_blocking=True)
|
541 |
-
for k, v in state_dict.items()
|
542 |
-
}
|
543 |
-
output = functional_call(module,
|
544 |
-
device_state,
|
545 |
-
args=args,
|
546 |
-
kwargs=kwargs)
|
547 |
-
module.forward = forward
|
548 |
-
return output
|
549 |
-
|
550 |
-
module.forward = forward
|
551 |
-
|
552 |
-
return module
|
553 |
-
|
554 |
-
|
555 |
-
def make_layers(
|
556 |
-
num_hidden_layers: int,
|
557 |
-
layer_fn: LayerFn,
|
558 |
-
prefix: str,
|
559 |
-
) -> Tuple[int, int, torch.nn.ModuleList]:
|
560 |
-
"""Make a list of layers with the given layer function, taking
|
561 |
-
pipeline parallelism into account.
|
562 |
-
"""
|
563 |
-
from vllm.distributed.parallel_state import get_pp_group
|
564 |
-
from vllm.distributed.utils import get_pp_indices
|
565 |
-
start_layer, end_layer = get_pp_indices(num_hidden_layers,
|
566 |
-
get_pp_group().rank_in_group,
|
567 |
-
get_pp_group().world_size)
|
568 |
-
modules = torch.nn.ModuleList(
|
569 |
-
[PPMissingLayer() for _ in range(start_layer)] + [
|
570 |
-
maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
|
571 |
-
for idx in range(start_layer, end_layer)
|
572 |
-
] + [PPMissingLayer() for _ in range(end_layer, num_hidden_layers)])
|
573 |
-
return start_layer, end_layer, modules
|
574 |
-
|
575 |
-
|
576 |
-
# NOTE: don't use lru_cache here because it can prevent garbage collection
|
577 |
-
_model_to_pp_missing_layer_names: Dict[int, List[str]] = {}
|
578 |
-
|
579 |
-
|
580 |
-
def get_pp_missing_layer_names(model: torch.nn.Module) -> List[str]:
|
581 |
-
"""Get the names of the missing layers in a pipeline parallel model."""
|
582 |
-
model_id = id(model)
|
583 |
-
if model_id in _model_to_pp_missing_layer_names:
|
584 |
-
return _model_to_pp_missing_layer_names[model_id]
|
585 |
-
|
586 |
-
missing_layer_names = []
|
587 |
-
for name, module in model.named_modules():
|
588 |
-
if isinstance(module, PPMissingLayer):
|
589 |
-
# NOTE: the trailing dot is used to match the prefix of the layer.
|
590 |
-
# without the dot, we could match a layer that is not missing,
|
591 |
-
# e.g., 'encoder.layer.1' would match 'encoder.layer.11'
|
592 |
-
missing_layer_names.append(name + '.')
|
593 |
-
_model_to_pp_missing_layer_names[model_id] = missing_layer_names
|
594 |
-
|
595 |
-
return missing_layer_names
|
596 |
-
|
597 |
-
|
598 |
-
def is_pp_missing_parameter(name: str, model: torch.nn.Module) -> bool:
|
599 |
-
"""Check if a parameter is missing in a pipeline parallel model."""
|
600 |
-
for missing_layer_name in get_pp_missing_layer_names(model):
|
601 |
-
if name.startswith(missing_layer_name):
|
602 |
-
return True
|
603 |
-
return False
|
|
|
468 |
inputs_embeds[mask] = torch.cat(vision_embeddings)
|
469 |
|
470 |
return inputs_embeds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|