runningSnail commited on
Commit
a33ba53
1 Parent(s): 488b987

update example

Browse files
Files changed (1) hide show
  1. README.md +29 -30
README.md CHANGED
@@ -48,36 +48,29 @@ from transformers import AutoTokenizer
48
  from configuration_dolphin import DolphinForCausalLM
49
  import time
50
 
51
- AutoConfig.register("dolphin", DolphinConfig)
52
- AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
53
-
54
- MEMORY_SIZE = 32
55
- def inference_instruct(mycontext, device = "cuda:0"):
56
  import time
57
- start = time.time()
 
58
  generated_token_ids = []
59
- prompt = " <context>Who and when founded the Shanda group?"
60
- print("input prompt: " + prompt)
61
- print("input context: " + mycontext)
62
  text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
63
  input_ids = (
64
- torch.tensor(text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long)
 
 
65
  .unsqueeze(0)
66
  .to(device)
67
  )
68
- # print(input_ids)
69
  # to process the context
70
  context_tokenized = tokenizer(
71
  mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
72
  return_tensors="pt",
73
  )
74
  context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
75
- # print(context_tokenized["input_ids"])
76
- context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
77
- print("length of context: " + str(context_token_count) + " tokens")
78
  # We conduct a inference process
79
  for i in range(context_token_count):
80
- print(f"\rGenerating token {i+1}/{context_token_count}", end="")
81
  next_token = (
82
  model(
83
  input_ids,
@@ -91,21 +84,27 @@ def inference_instruct(mycontext, device = "cuda:0"):
91
  break
92
  generated_token_ids.append(next_token.item())
93
  input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
94
- print("\noutput: " + tokenizer.decode(generated_token_ids))
95
- end = time.time()
96
- print(f"Elapsed time: {end - start:.2f}s")
97
-
98
-
99
- # Load the tokenizer and model
100
- tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
101
- model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
102
-
103
- # Run inference example
104
- mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
105
- inference_instruct(mycontext, "who founded Nexa AI?")
106
- inference_instruct(mycontext, "what is the mission of Nexa AI?")
107
- inference_instruct(mycontext, "what is the performance of Octopus V2 and V3?")
108
- inference_instruct(mycontext, "when is Nexa AI founded?")
 
 
 
 
 
 
109
  ```
110
 
111
  ## Training Process
 
48
  from configuration_dolphin import DolphinForCausalLM
49
  import time
50
 
51
+ def inference_instruct(mycontext, question, device="cuda:0"):
 
 
 
 
52
  import time
53
+ MEMORY_SIZE = 32
54
+ start_time = time.time()
55
  generated_token_ids = []
56
+ prompt = f" <context>{question}"
 
 
57
  text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
58
  input_ids = (
59
+ torch.tensor(
60
+ text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long
61
+ )
62
  .unsqueeze(0)
63
  .to(device)
64
  )
 
65
  # to process the context
66
  context_tokenized = tokenizer(
67
  mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
68
  return_tensors="pt",
69
  )
70
  context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
71
+ context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
 
 
72
  # We conduct a inference process
73
  for i in range(context_token_count):
 
74
  next_token = (
75
  model(
76
  input_ids,
 
84
  break
85
  generated_token_ids.append(next_token.item())
86
  input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
87
+ result = tokenizer.decode(generated_token_ids)
88
+ print(f"Time taken: {time.time() - start_time}")
89
+ return result
90
+
91
+
92
+ if __name__ == "__main__":
93
+ # Register your configuration and model
94
+ AutoConfig.register("dolphin", DolphinConfig)
95
+ AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
96
+ device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
97
+
98
+ # Load the tokenizer and model
99
+ tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
100
+ model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
101
+
102
+ # Run inference example
103
+ mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
104
+ question = "Who founded Nexa AI?"
105
+ # Pass the context and the correct device string
106
+ result = inference_instruct(mycontext, question, device=device_name)
107
+ print("Result:", result)
108
  ```
109
 
110
  ## Training Process