runningSnail commited on
Commit
6ad73d9
1 Parent(s): 34728e2

update modeling code

Browse files
Files changed (2) hide show
  1. README.md +58 -15
  2. modeling_dolphin.py +57 -10
README.md CHANGED
@@ -48,21 +48,64 @@ from transformers import AutoTokenizer
48
  from configuration_dolphin import DolphinForCausalLM
49
  import time
50
 
51
- tokenizer = AutoTokenizer.from_pretrained('nexa-collaboration/dolphin_instruct_1M_0805', trust_remote_code=True)
52
- model = DolphinForCausalLM.from_pretrained('nexa-collaboration/dolphin_instruct_1M_0805', trust_remote_code=True)
53
-
54
- def inference(input_text):
55
- inputs = tokenizer(input_text, return_tensors="pt")
56
- outputs = model.generate(**inputs, max_new_tokens=100)
57
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
58
-
59
- input_text = "Take a selfie for me with front camera"
60
- nexa_query = f"Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: {input_text} \n\nResponse:"
61
-
62
- start_time = time.time()
63
- result = inference(nexa_query)
64
- print("Dolphin model result:\n", result)
65
- print("Latency:", time.time() - start_time, "s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ```
67
 
68
  ## Training Process
 
48
  from configuration_dolphin import DolphinForCausalLM
49
  import time
50
 
51
+ AutoConfig.register("dolphin", DolphinConfig)
52
+ AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
53
+
54
+ MEMORY_SIZE = 32
55
+ def inference_instruct(mycontext, device = "cuda:0"):
56
+ import time
57
+ start = time.time()
58
+ generated_token_ids = []
59
+ prompt = " <context>Who and when founded the Shanda group?"
60
+ print("input prompt: " + prompt)
61
+ print("input context: " + mycontext)
62
+ text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
63
+ input_ids = (
64
+ torch.tensor(text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long)
65
+ .unsqueeze(0)
66
+ .to(device)
67
+ )
68
+ # print(input_ids)
69
+ # to process the context
70
+ context_tokenized = tokenizer(
71
+ mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
72
+ return_tensors="pt",
73
+ )
74
+ context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
75
+ # print(context_tokenized["input_ids"])
76
+ context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
77
+ print("length of context: " + str(context_token_count) + " tokens")
78
+ # We conduct a inference process
79
+ for i in range(context_token_count):
80
+ print(f"\rGenerating token {i+1}/{context_token_count}", end="")
81
+ next_token = (
82
+ model(
83
+ input_ids,
84
+ context_input_ids=context_tokenized["input_ids"],
85
+ context_attention_mask=context_tokenized["attention_mask"],
86
+ )
87
+ .logits[:, -1]
88
+ .argmax(-1)
89
+ )
90
+ if next_token.item() == 151643:
91
+ break
92
+ generated_token_ids.append(next_token.item())
93
+ input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
94
+ print("\noutput: " + tokenizer.decode(generated_token_ids))
95
+ end = time.time()
96
+ print(f"Elapsed time: {end - start:.2f}s")
97
+
98
+
99
+ # Load the tokenizer and model
100
+ tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
101
+ model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
102
+
103
+ # Run inference example
104
+ mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
105
+ inference_instruct(mycontext, "who founded Nexa AI?")
106
+ inference_instruct(mycontext, "what is the mission of Nexa AI?")
107
+ inference_instruct(mycontext, "what is the performance of Octopus V2 and V3?")
108
+ inference_instruct(mycontext, "when is Nexa AI founded?")
109
  ```
110
 
111
  ## Training Process
modeling_dolphin.py CHANGED
@@ -15,13 +15,14 @@ from transformers.modeling_attn_mask_utils import (
15
  AttentionMaskConverter,
16
  )
17
  from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
 
18
  import torch
19
  import torch.nn as nn
20
  from typing import List, Optional, Tuple, Union
21
  import warnings
22
  from dataclasses import dataclass
23
  from torch.nn import CrossEntropyLoss
24
- from .configuration_dolphin import encoder_config_dict, Qwen2Config
25
 
26
  CONTEXT_EMB = 896 # Qwen 0.7B has dimension of 896
27
  HIDDEN_EMB = 3584 # Qwen 7B has dimension of 3584
@@ -187,7 +188,7 @@ class DolphinModel(Qwen2PreTrainedModel):
187
  """
188
  config_class = DolphinConfig
189
 
190
- def __init__(self, config: Qwen2Config):
191
  super().__init__(config)
192
  self.padding_idx = config.pad_token_id
193
  self.vocab_size = config.vocab_size
@@ -731,17 +732,63 @@ class DolphinForCausalLM(Qwen2PreTrainedModel):
731
  )
732
  return reordered_past
733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  if __name__ == "__main__":
735
  # Register your configuration and model
736
  AutoConfig.register("dolphin", DolphinConfig)
737
  AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
738
 
739
- # Ensure that your config is loaded with the correct model type
740
- config = DolphinConfig(encoder_config=encoder_config_dict)
741
-
742
- # Save the configuration (if needed)
743
- config.save_pretrained("dolphin")
744
-
745
  # Load the tokenizer and model
746
- tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote=True)
747
- model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', config=config, trust_remote=True)
 
 
 
 
 
 
 
 
15
  AttentionMaskConverter,
16
  )
17
  from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
18
+ from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
19
  import torch
20
  import torch.nn as nn
21
  from typing import List, Optional, Tuple, Union
22
  import warnings
23
  from dataclasses import dataclass
24
  from torch.nn import CrossEntropyLoss
25
+ from .configuration_dolphin import encoder_config_dict, DolphinConfig
26
 
27
  CONTEXT_EMB = 896 # Qwen 0.7B has dimension of 896
28
  HIDDEN_EMB = 3584 # Qwen 7B has dimension of 3584
 
188
  """
189
  config_class = DolphinConfig
190
 
191
+ def __init__(self, config: DolphinConfig):
192
  super().__init__(config)
193
  self.padding_idx = config.pad_token_id
194
  self.vocab_size = config.vocab_size
 
732
  )
733
  return reordered_past
734
 
735
+ MEMORY_SIZE = 32
736
+ def inference_instruct(mycontext, device = "cuda:0"):
737
+ import time
738
+ start = time.time()
739
+ generated_token_ids = []
740
+ prompt = " <context>Who and when founded the Shanda group?"
741
+ print("input prompt: " + prompt)
742
+ print("input context: " + mycontext)
743
+ text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
744
+ input_ids = (
745
+ torch.tensor(text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long)
746
+ .unsqueeze(0)
747
+ .to(device)
748
+ )
749
+ # print(input_ids)
750
+ # to process the context
751
+ context_tokenized = tokenizer(
752
+ mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
753
+ return_tensors="pt",
754
+ )
755
+ context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
756
+ # print(context_tokenized["input_ids"])
757
+ context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
758
+ print("length of context: " + str(context_token_count) + " tokens")
759
+ # We conduct a inference process
760
+ for i in range(context_token_count):
761
+ print(f"\rGenerating token {i+1}/{context_token_count}", end="")
762
+ next_token = (
763
+ model(
764
+ input_ids,
765
+ context_input_ids=context_tokenized["input_ids"],
766
+ context_attention_mask=context_tokenized["attention_mask"],
767
+ )
768
+ .logits[:, -1]
769
+ .argmax(-1)
770
+ )
771
+ if next_token.item() == 151643:
772
+ break
773
+ generated_token_ids.append(next_token.item())
774
+ input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
775
+ print("\noutput: " + tokenizer.decode(generated_token_ids))
776
+ end = time.time()
777
+ print(f"Elapsed time: {end - start:.2f}s")
778
+
779
+
780
  if __name__ == "__main__":
781
  # Register your configuration and model
782
  AutoConfig.register("dolphin", DolphinConfig)
783
  AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
784
 
 
 
 
 
 
 
785
  # Load the tokenizer and model
786
+ tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
787
+ model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
788
+
789
+ # Run inference example
790
+ mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
791
+ inference_instruct(mycontext, "who founded Nexa AI?")
792
+ inference_instruct(mycontext, "what is the mission of Nexa AI?")
793
+ inference_instruct(mycontext, "what is the performance of Octopus V2 and V3?")
794
+ inference_instruct(mycontext, "when is Nexa AI founded?")