meetkai
/

functionary-small-v2.5

@@ -19,6 +19,44 @@ The model determines when to execute functions, whether in parallel or serially,
 - Truly one of the best open-source alternative to GPT-4
 - Support code interpreter
 ## Prompt Template
 We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.

 - Truly one of the best open-source alternative to GPT-4
 - Support code interpreter
+## How to Get Started
+We provide custom
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-small-v2.5", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-small-v2.5", device_map="auto", trust_remote_code=True)
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA"
+                    }
+                },
+                "required": ["location"]
+            }
+        }
+    }
+]
+messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
+final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
+tokenizer.padding_side = "left"
+inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
+pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
+print(tokenizer.decode(pred.cpu()[0]))
+```
 ## Prompt Template
 We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.

config.json CHANGED Viewed

@@ -1,8 +1,11 @@
 {
   "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
-    "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,

 {
   "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
+    "FunctionaryForCausalLM"
   ],
+  "auto_map": {
+    "AutoModelForCausalLM": "modeling_functionary.FunctionaryForCausalLM"
+  },
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,

modeling_functionary.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch LLaMA model."""
+import json
+import re
+from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
+import torch
+import torch.utils.checkpoint
+from transformers.generation.configuration_utils import GenerationConfig
+from transformers.generation.logits_process import LogitsProcessorList
+from transformers.generation.stopping_criteria import StoppingCriteriaList
+from transformers.generation.utils import (
+    GenerateBeamDecoderOnlyOutput,
+    GenerateBeamEncoderDecoderOutput,
+    GenerateDecoderOnlyOutput,
+    GenerateEncoderDecoderOutput
+)
+from transformers.models.llama.modeling_llama import LlamaForCausalLM
+from transformers.utils import logging
+if TYPE_CHECKING:
+    from transformers.modeling_utils import PreTrainedModel
+    from transformers.generation.streamers import BaseStreamer
+logger = logging.get_logger(__name__)
+GenerateNonBeamOutput = Union[GenerateDecoderOnlyOutput, GenerateEncoderDecoderOutput]
+GenerateBeamOutput = Union[GenerateBeamDecoderOnlyOutput, GenerateBeamEncoderDecoderOutput]
+GenerateOutput = Union[GenerateNonBeamOutput, GenerateBeamOutput]
+class FunctionaryForCausalLM(LlamaForCausalLM):
+    def generate_tool_use(
+        self,
+        inputs: Optional[torch.Tensor] = None,
+        generation_config: Optional[GenerationConfig] = None,
+        logits_processor: Optional[LogitsProcessorList] = None,
+        stopping_criteria: Optional[StoppingCriteriaList] = None,
+        prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
+        synced_gpus: Optional[bool] = None,
+        assistant_model: Optional["PreTrainedModel"] = None,
+        streamer: Optional["BaseStreamer"] = None,
+        negative_prompt_ids: Optional[torch.Tensor] = None,
+        negative_prompt_attention_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[GenerateOutput, torch.LongTensor]:
+        results = self.generate(
+            inputs=inputs,
+            generation_config=generation_config,
+            logits_processor=logits_processor,
+            stopping_criteria=stopping_criteria,
+            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+            synced_gpus=synced_gpus,
+            assistant_model=assistant_model,
+            streamer=streamer,
+            negative_prompt_ids=negative_prompt_ids,
+            negative_prompt_attention_mask=negative_prompt_attention_mask,
+            **kwargs,
+        )
+        tokenizer = kwargs.pop("tokenizer", None)  # Pull this out first, we use it to parse raw output
+        input_ids = kwargs.pop("input_ids")
+        function_call_token = "<|reserved_special_token_249|>"
+        correct_results = []
+        for input_id, result in zip(input_ids, results):
+            final_output_json = {"role": "assistant", "content": None, "tool_calls": None}
+            tool_calls = []
+            raw_output_str = tokenizer.decode(result[len(input_id):].cpu())
+            has_text = False if raw_output_str.startswith(function_call_token) else True
+            chunks = raw_output_str.split(function_call_token)
+            for i, chunk in enumerate(chunks):
+                if len(chunk) == 0:
+                    continue
+                chunk = chunk.replace(tokenizer.pad_token, "")
+                if i == 0 and has_text is not False:
+                    final_output_json["content"] = chunk.strip[:-len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk
+                else:
+                    tool_calls.append(
+                        {
+                            "name": chunk[: chunk.index("\n{")],
+                            "arguments": chunk[chunk.index("\n{") + 1: -len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk[chunk.index("\n{") + 1:]
+                        }
+                    )
+            if len(tool_calls) > 0:
+                final_output_json["tool_calls"] = tool_calls
+            final_output_str = json.dumps(final_output_json, indent=4)
+            final_output_ids = tokenizer(final_output_str, add_special_tokens=False)["input_ids"]
+            correct_results.append(
+                torch.cat(
+                    (result[:len(input_id)].cpu(), torch.tensor(final_output_ids))
+                )
+            )
+        max_len = max([tensor.shape[0] for tensor in correct_results])
+        correct_results = [
+            torch.nn.functional.pad(
+                correct_result, (0, max_len - correct_result.shape[0]), value=tokenizer.eos_token_id
+            ) for correct_result in correct_results
+        ]
+        correct_results = torch.stack(correct_results)
+        return correct_results

tokenizer_config.json CHANGED Viewed

@@ -2050,7 +2050,7 @@
     }
   },
   "bos_token": "<|begin_of_text|>",
-  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "legacy": true,

     }
   },
   "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "legacy": true,