jeffreymeetkai
commited on
Commit
•
2e54059
1
Parent(s):
7fae8ba
add respone parsing remove code, update README
Browse files- README.md +38 -0
- config.json +4 -1
- modeling_functionary.py +126 -0
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -19,6 +19,44 @@ The model determines when to execute functions, whether in parallel or serially,
|
|
19 |
- Truly one of the best open-source alternative to GPT-4
|
20 |
- Support code interpreter
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
## Prompt Template
|
23 |
|
24 |
We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
|
|
|
19 |
- Truly one of the best open-source alternative to GPT-4
|
20 |
- Support code interpreter
|
21 |
|
22 |
+
## How to Get Started
|
23 |
+
|
24 |
+
We provide custom
|
25 |
+
|
26 |
+
```python
|
27 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
28 |
+
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-small-v2.5", trust_remote_code=True)
|
30 |
+
model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-small-v2.5", device_map="auto", trust_remote_code=True)
|
31 |
+
|
32 |
+
tools = [
|
33 |
+
{
|
34 |
+
"type": "function",
|
35 |
+
"function": {
|
36 |
+
"name": "get_current_weather",
|
37 |
+
"description": "Get the current weather",
|
38 |
+
"parameters": {
|
39 |
+
"type": "object",
|
40 |
+
"properties": {
|
41 |
+
"location": {
|
42 |
+
"type": "string",
|
43 |
+
"description": "The city and state, e.g. San Francisco, CA"
|
44 |
+
}
|
45 |
+
},
|
46 |
+
"required": ["location"]
|
47 |
+
}
|
48 |
+
}
|
49 |
+
}
|
50 |
+
]
|
51 |
+
messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
|
52 |
+
|
53 |
+
final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
|
54 |
+
tokenizer.padding_side = "left"
|
55 |
+
inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
|
56 |
+
pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
|
57 |
+
print(tokenizer.decode(pred.cpu()[0]))
|
58 |
+
```
|
59 |
+
|
60 |
## Prompt Template
|
61 |
|
62 |
We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
|
config.json
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
{
|
2 |
"_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
|
|
|
|
|
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"bos_token_id": 128000,
|
|
|
1 |
{
|
2 |
"_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
|
3 |
"architectures": [
|
4 |
+
"FunctionaryForCausalLM"
|
5 |
],
|
6 |
+
"auto_map": {
|
7 |
+
"AutoModelForCausalLM": "modeling_functionary.FunctionaryForCausalLM"
|
8 |
+
},
|
9 |
"attention_bias": false,
|
10 |
"attention_dropout": 0.0,
|
11 |
"bos_token_id": 128000,
|
modeling_functionary.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
|
3 |
+
#
|
4 |
+
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
|
5 |
+
# and OPT implementations in this library. It has been modified from its
|
6 |
+
# original forms to accommodate minor architectural differences compared
|
7 |
+
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
|
8 |
+
#
|
9 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
10 |
+
# you may not use this file except in compliance with the License.
|
11 |
+
# You may obtain a copy of the License at
|
12 |
+
#
|
13 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
14 |
+
#
|
15 |
+
# Unless required by applicable law or agreed to in writing, software
|
16 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
17 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
18 |
+
# See the License for the specific language governing permissions and
|
19 |
+
# limitations under the License.
|
20 |
+
"""PyTorch LLaMA model."""
|
21 |
+
|
22 |
+
import json
|
23 |
+
import re
|
24 |
+
from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
|
25 |
+
|
26 |
+
import torch
|
27 |
+
import torch.utils.checkpoint
|
28 |
+
|
29 |
+
from transformers.generation.configuration_utils import GenerationConfig
|
30 |
+
from transformers.generation.logits_process import LogitsProcessorList
|
31 |
+
from transformers.generation.stopping_criteria import StoppingCriteriaList
|
32 |
+
from transformers.generation.utils import (
|
33 |
+
GenerateBeamDecoderOnlyOutput,
|
34 |
+
GenerateBeamEncoderDecoderOutput,
|
35 |
+
GenerateDecoderOnlyOutput,
|
36 |
+
GenerateEncoderDecoderOutput
|
37 |
+
)
|
38 |
+
from transformers.models.llama.modeling_llama import LlamaForCausalLM
|
39 |
+
from transformers.utils import logging
|
40 |
+
|
41 |
+
|
42 |
+
if TYPE_CHECKING:
|
43 |
+
from transformers.modeling_utils import PreTrainedModel
|
44 |
+
from transformers.generation.streamers import BaseStreamer
|
45 |
+
|
46 |
+
logger = logging.get_logger(__name__)
|
47 |
+
|
48 |
+
GenerateNonBeamOutput = Union[GenerateDecoderOnlyOutput, GenerateEncoderDecoderOutput]
|
49 |
+
GenerateBeamOutput = Union[GenerateBeamDecoderOnlyOutput, GenerateBeamEncoderDecoderOutput]
|
50 |
+
GenerateOutput = Union[GenerateNonBeamOutput, GenerateBeamOutput]
|
51 |
+
|
52 |
+
|
53 |
+
class FunctionaryForCausalLM(LlamaForCausalLM):
|
54 |
+
|
55 |
+
def generate_tool_use(
|
56 |
+
self,
|
57 |
+
inputs: Optional[torch.Tensor] = None,
|
58 |
+
generation_config: Optional[GenerationConfig] = None,
|
59 |
+
logits_processor: Optional[LogitsProcessorList] = None,
|
60 |
+
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
61 |
+
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
62 |
+
synced_gpus: Optional[bool] = None,
|
63 |
+
assistant_model: Optional["PreTrainedModel"] = None,
|
64 |
+
streamer: Optional["BaseStreamer"] = None,
|
65 |
+
negative_prompt_ids: Optional[torch.Tensor] = None,
|
66 |
+
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
67 |
+
**kwargs,
|
68 |
+
) -> Union[GenerateOutput, torch.LongTensor]:
|
69 |
+
|
70 |
+
results = self.generate(
|
71 |
+
inputs=inputs,
|
72 |
+
generation_config=generation_config,
|
73 |
+
logits_processor=logits_processor,
|
74 |
+
stopping_criteria=stopping_criteria,
|
75 |
+
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
|
76 |
+
synced_gpus=synced_gpus,
|
77 |
+
assistant_model=assistant_model,
|
78 |
+
streamer=streamer,
|
79 |
+
negative_prompt_ids=negative_prompt_ids,
|
80 |
+
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
81 |
+
**kwargs,
|
82 |
+
)
|
83 |
+
|
84 |
+
tokenizer = kwargs.pop("tokenizer", None) # Pull this out first, we use it to parse raw output
|
85 |
+
input_ids = kwargs.pop("input_ids")
|
86 |
+
function_call_token = "<|reserved_special_token_249|>"
|
87 |
+
|
88 |
+
correct_results = []
|
89 |
+
for input_id, result in zip(input_ids, results):
|
90 |
+
final_output_json = {"role": "assistant", "content": None, "tool_calls": None}
|
91 |
+
tool_calls = []
|
92 |
+
raw_output_str = tokenizer.decode(result[len(input_id):].cpu())
|
93 |
+
has_text = False if raw_output_str.startswith(function_call_token) else True
|
94 |
+
chunks = raw_output_str.split(function_call_token)
|
95 |
+
for i, chunk in enumerate(chunks):
|
96 |
+
if len(chunk) == 0:
|
97 |
+
continue
|
98 |
+
|
99 |
+
chunk = chunk.replace(tokenizer.pad_token, "")
|
100 |
+
if i == 0 and has_text is not False:
|
101 |
+
final_output_json["content"] = chunk.strip[:-len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk
|
102 |
+
else:
|
103 |
+
tool_calls.append(
|
104 |
+
{
|
105 |
+
"name": chunk[: chunk.index("\n{")],
|
106 |
+
"arguments": chunk[chunk.index("\n{") + 1: -len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk[chunk.index("\n{") + 1:]
|
107 |
+
}
|
108 |
+
)
|
109 |
+
if len(tool_calls) > 0:
|
110 |
+
final_output_json["tool_calls"] = tool_calls
|
111 |
+
final_output_str = json.dumps(final_output_json, indent=4)
|
112 |
+
final_output_ids = tokenizer(final_output_str, add_special_tokens=False)["input_ids"]
|
113 |
+
correct_results.append(
|
114 |
+
torch.cat(
|
115 |
+
(result[:len(input_id)].cpu(), torch.tensor(final_output_ids))
|
116 |
+
)
|
117 |
+
)
|
118 |
+
max_len = max([tensor.shape[0] for tensor in correct_results])
|
119 |
+
correct_results = [
|
120 |
+
torch.nn.functional.pad(
|
121 |
+
correct_result, (0, max_len - correct_result.shape[0]), value=tokenizer.eos_token_id
|
122 |
+
) for correct_result in correct_results
|
123 |
+
]
|
124 |
+
correct_results = torch.stack(correct_results)
|
125 |
+
|
126 |
+
return correct_results
|
tokenizer_config.json
CHANGED
@@ -2050,7 +2050,7 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"legacy": true,
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"legacy": true,
|