runningSnail
commited on
Commit
•
a33ba53
1
Parent(s):
488b987
update example
Browse files
README.md
CHANGED
@@ -48,36 +48,29 @@ from transformers import AutoTokenizer
|
|
48 |
from configuration_dolphin import DolphinForCausalLM
|
49 |
import time
|
50 |
|
51 |
-
|
52 |
-
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
53 |
-
|
54 |
-
MEMORY_SIZE = 32
|
55 |
-
def inference_instruct(mycontext, device = "cuda:0"):
|
56 |
import time
|
57 |
-
|
|
|
58 |
generated_token_ids = []
|
59 |
-
prompt = " <context>
|
60 |
-
print("input prompt: " + prompt)
|
61 |
-
print("input context: " + mycontext)
|
62 |
text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
|
63 |
input_ids = (
|
64 |
-
torch.tensor(
|
|
|
|
|
65 |
.unsqueeze(0)
|
66 |
.to(device)
|
67 |
)
|
68 |
-
# print(input_ids)
|
69 |
# to process the context
|
70 |
context_tokenized = tokenizer(
|
71 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
72 |
return_tensors="pt",
|
73 |
)
|
74 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
75 |
-
|
76 |
-
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
77 |
-
print("length of context: " + str(context_token_count) + " tokens")
|
78 |
# We conduct a inference process
|
79 |
for i in range(context_token_count):
|
80 |
-
print(f"\rGenerating token {i+1}/{context_token_count}", end="")
|
81 |
next_token = (
|
82 |
model(
|
83 |
input_ids,
|
@@ -91,21 +84,27 @@ def inference_instruct(mycontext, device = "cuda:0"):
|
|
91 |
break
|
92 |
generated_token_ids.append(next_token.item())
|
93 |
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
```
|
110 |
|
111 |
## Training Process
|
|
|
48 |
from configuration_dolphin import DolphinForCausalLM
|
49 |
import time
|
50 |
|
51 |
+
def inference_instruct(mycontext, question, device="cuda:0"):
|
|
|
|
|
|
|
|
|
52 |
import time
|
53 |
+
MEMORY_SIZE = 32
|
54 |
+
start_time = time.time()
|
55 |
generated_token_ids = []
|
56 |
+
prompt = f" <context>{question}"
|
|
|
|
|
57 |
text_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<context>")]
|
58 |
input_ids = (
|
59 |
+
torch.tensor(
|
60 |
+
text_chunks[0] + [-1] * MEMORY_SIZE + text_chunks[1], dtype=torch.long
|
61 |
+
)
|
62 |
.unsqueeze(0)
|
63 |
.to(device)
|
64 |
)
|
|
|
65 |
# to process the context
|
66 |
context_tokenized = tokenizer(
|
67 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
68 |
return_tensors="pt",
|
69 |
)
|
70 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
71 |
+
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
|
|
|
|
72 |
# We conduct a inference process
|
73 |
for i in range(context_token_count):
|
|
|
74 |
next_token = (
|
75 |
model(
|
76 |
input_ids,
|
|
|
84 |
break
|
85 |
generated_token_ids.append(next_token.item())
|
86 |
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
87 |
+
result = tokenizer.decode(generated_token_ids)
|
88 |
+
print(f"Time taken: {time.time() - start_time}")
|
89 |
+
return result
|
90 |
+
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
# Register your configuration and model
|
94 |
+
AutoConfig.register("dolphin", DolphinConfig)
|
95 |
+
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
96 |
+
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
|
97 |
+
|
98 |
+
# Load the tokenizer and model
|
99 |
+
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True)
|
100 |
+
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
|
101 |
+
|
102 |
+
# Run inference example
|
103 |
+
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
104 |
+
question = "Who founded Nexa AI?"
|
105 |
+
# Pass the context and the correct device string
|
106 |
+
result = inference_instruct(mycontext, question, device=device_name)
|
107 |
+
print("Result:", result)
|
108 |
```
|
109 |
|
110 |
## Training Process
|