runningSnail
commited on
Commit
•
6ad73d9
1
Parent(s):
34728e2
update modeling code
Browse files- README.md +58 -15
- modeling_dolphin.py +57 -10
README.md
CHANGED
@@ -48,21 +48,64 @@ from transformers import AutoTokenizer
|
|
48 |
from configuration_dolphin import DolphinForCausalLM
|
49 |
import time
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
```
|
67 |
|
68 |
## Training Process
|
|
|
48 |
from configuration_dolphin import DolphinForCausalLM
|
49 |
import time
|
50 |
|
51 |
+
AutoConfig.register("dolphin", DolphinConfig)
|
52 |
+
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
53 |
+
|
54 |
+
MEMORY_SIZE = 32
|
55 |
+
def inference_instruct(mycontext, device = "cuda:0"):
|
56 |
+
import time
|
57 |
+
start = time.time()
|
58 |
+
generated_token_ids = []
|
59 |
+
prompt = " <context>Who and when founded the Shanda group?"
|
60 |
+
print("input prompt: " + prompt)
|
61 |
+
print("input context: " + mycontext)
|
62 |
+
text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
|
63 |
+
input_ids = (
|
64 |
+
torch.tensor(text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long)
|
65 |
+
.unsqueeze(0)
|
66 |
+
.to(device)
|
67 |
+
)
|
68 |
+
# print(input_ids)
|
69 |
+
# to process the context
|
70 |
+
context_tokenized = tokenizer(
|
71 |
+
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
72 |
+
return_tensors="pt",
|
73 |
+
)
|
74 |
+
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
75 |
+
# print(context_tokenized["input_ids"])
|
76 |
+
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
77 |
+
print("length of context: " + str(context_token_count) + " tokens")
|
78 |
+
# We conduct a inference process
|
79 |
+
for i in range(context_token_count):
|
80 |
+
print(f"\rGenerating token {i+1}/{context_token_count}", end="")
|
81 |
+
next_token = (
|
82 |
+
model(
|
83 |
+
input_ids,
|
84 |
+
context_input_ids=context_tokenized["input_ids"],
|
85 |
+
context_attention_mask=context_tokenized["attention_mask"],
|
86 |
+
)
|
87 |
+
.logits[:, -1]
|
88 |
+
.argmax(-1)
|
89 |
+
)
|
90 |
+
if next_token.item() == 151643:
|
91 |
+
break
|
92 |
+
generated_token_ids.append(next_token.item())
|
93 |
+
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
94 |
+
print("\noutput: " + tokenizer.decode(generated_token_ids))
|
95 |
+
end = time.time()
|
96 |
+
print(f"Elapsed time: {end - start:.2f}s")
|
97 |
+
|
98 |
+
|
99 |
+
# Load the tokenizer and model
|
100 |
+
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
|
101 |
+
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
|
102 |
+
|
103 |
+
# Run inference example
|
104 |
+
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
105 |
+
inference_instruct(mycontext, "who founded Nexa AI?")
|
106 |
+
inference_instruct(mycontext, "what is the mission of Nexa AI?")
|
107 |
+
inference_instruct(mycontext, "what is the performance of Octopus V2 and V3?")
|
108 |
+
inference_instruct(mycontext, "when is Nexa AI founded?")
|
109 |
```
|
110 |
|
111 |
## Training Process
|
modeling_dolphin.py
CHANGED
@@ -15,13 +15,14 @@ from transformers.modeling_attn_mask_utils import (
|
|
15 |
AttentionMaskConverter,
|
16 |
)
|
17 |
from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
|
|
|
18 |
import torch
|
19 |
import torch.nn as nn
|
20 |
from typing import List, Optional, Tuple, Union
|
21 |
import warnings
|
22 |
from dataclasses import dataclass
|
23 |
from torch.nn import CrossEntropyLoss
|
24 |
-
from .configuration_dolphin import encoder_config_dict,
|
25 |
|
26 |
CONTEXT_EMB = 896 # Qwen 0.7B has dimension of 896
|
27 |
HIDDEN_EMB = 3584 # Qwen 7B has dimension of 3584
|
@@ -187,7 +188,7 @@ class DolphinModel(Qwen2PreTrainedModel):
|
|
187 |
"""
|
188 |
config_class = DolphinConfig
|
189 |
|
190 |
-
def __init__(self, config:
|
191 |
super().__init__(config)
|
192 |
self.padding_idx = config.pad_token_id
|
193 |
self.vocab_size = config.vocab_size
|
@@ -731,17 +732,63 @@ class DolphinForCausalLM(Qwen2PreTrainedModel):
|
|
731 |
)
|
732 |
return reordered_past
|
733 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
734 |
if __name__ == "__main__":
|
735 |
# Register your configuration and model
|
736 |
AutoConfig.register("dolphin", DolphinConfig)
|
737 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
738 |
|
739 |
-
# Ensure that your config is loaded with the correct model type
|
740 |
-
config = DolphinConfig(encoder_config=encoder_config_dict)
|
741 |
-
|
742 |
-
# Save the configuration (if needed)
|
743 |
-
config.save_pretrained("dolphin")
|
744 |
-
|
745 |
# Load the tokenizer and model
|
746 |
-
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin',
|
747 |
-
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
AttentionMaskConverter,
|
16 |
)
|
17 |
from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
|
18 |
+
from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
|
19 |
import torch
|
20 |
import torch.nn as nn
|
21 |
from typing import List, Optional, Tuple, Union
|
22 |
import warnings
|
23 |
from dataclasses import dataclass
|
24 |
from torch.nn import CrossEntropyLoss
|
25 |
+
from .configuration_dolphin import encoder_config_dict, DolphinConfig
|
26 |
|
27 |
CONTEXT_EMB = 896 # Qwen 0.7B has dimension of 896
|
28 |
HIDDEN_EMB = 3584 # Qwen 7B has dimension of 3584
|
|
|
188 |
"""
|
189 |
config_class = DolphinConfig
|
190 |
|
191 |
+
def __init__(self, config: DolphinConfig):
|
192 |
super().__init__(config)
|
193 |
self.padding_idx = config.pad_token_id
|
194 |
self.vocab_size = config.vocab_size
|
|
|
732 |
)
|
733 |
return reordered_past
|
734 |
|
735 |
+
MEMORY_SIZE = 32
|
736 |
+
def inference_instruct(mycontext, device = "cuda:0"):
|
737 |
+
import time
|
738 |
+
start = time.time()
|
739 |
+
generated_token_ids = []
|
740 |
+
prompt = " <context>Who and when founded the Shanda group?"
|
741 |
+
print("input prompt: " + prompt)
|
742 |
+
print("input context: " + mycontext)
|
743 |
+
text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
|
744 |
+
input_ids = (
|
745 |
+
torch.tensor(text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long)
|
746 |
+
.unsqueeze(0)
|
747 |
+
.to(device)
|
748 |
+
)
|
749 |
+
# print(input_ids)
|
750 |
+
# to process the context
|
751 |
+
context_tokenized = tokenizer(
|
752 |
+
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
753 |
+
return_tensors="pt",
|
754 |
+
)
|
755 |
+
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
756 |
+
# print(context_tokenized["input_ids"])
|
757 |
+
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
758 |
+
print("length of context: " + str(context_token_count) + " tokens")
|
759 |
+
# We conduct a inference process
|
760 |
+
for i in range(context_token_count):
|
761 |
+
print(f"\rGenerating token {i+1}/{context_token_count}", end="")
|
762 |
+
next_token = (
|
763 |
+
model(
|
764 |
+
input_ids,
|
765 |
+
context_input_ids=context_tokenized["input_ids"],
|
766 |
+
context_attention_mask=context_tokenized["attention_mask"],
|
767 |
+
)
|
768 |
+
.logits[:, -1]
|
769 |
+
.argmax(-1)
|
770 |
+
)
|
771 |
+
if next_token.item() == 151643:
|
772 |
+
break
|
773 |
+
generated_token_ids.append(next_token.item())
|
774 |
+
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
775 |
+
print("\noutput: " + tokenizer.decode(generated_token_ids))
|
776 |
+
end = time.time()
|
777 |
+
print(f"Elapsed time: {end - start:.2f}s")
|
778 |
+
|
779 |
+
|
780 |
if __name__ == "__main__":
|
781 |
# Register your configuration and model
|
782 |
AutoConfig.register("dolphin", DolphinConfig)
|
783 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
784 |
|
|
|
|
|
|
|
|
|
|
|
|
|
785 |
# Load the tokenizer and model
|
786 |
+
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
|
787 |
+
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
|
788 |
+
|
789 |
+
# Run inference example
|
790 |
+
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
791 |
+
inference_instruct(mycontext, "who founded Nexa AI?")
|
792 |
+
inference_instruct(mycontext, "what is the mission of Nexa AI?")
|
793 |
+
inference_instruct(mycontext, "what is the performance of Octopus V2 and V3?")
|
794 |
+
inference_instruct(mycontext, "when is Nexa AI founded?")
|