suriya7 commited on
Commit
5b46725
1 Parent(s): ecd27a8

Delete custom_tokenizer.py

Browse files
Files changed (1) hide show
  1. custom_tokenizer.py +0 -40
custom_tokenizer.py DELETED
@@ -1,40 +0,0 @@
1
- from transformers import GPT2Tokenizer
2
-
3
- class CustomGPT2Tokenizer(GPT2Tokenizer):
4
- def __init__(self, *args, **kwargs):
5
- super().__init__(*args, **kwargs)
6
- self.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
7
-
8
- def apply_chat_template(self, messages, add_system_prompt=True, add_generation_prompt=True):
9
- """
10
- Applies the chat template to the provided messages. Optionally adds the system prompt.
11
-
12
- Args:
13
- messages (list): List of message dictionaries with 'role' and 'content'.
14
- add_system_prompt (bool): If True, adds the system prompt at the beginning.
15
- add_generation_prompt (bool): If True, adds a prompt for generation.
16
-
17
- Returns:
18
- str: Formatted text with the chat template applied.
19
- """
20
- # Handle template processing here
21
- formatted_messages = []
22
- for message in messages:
23
- role = message.get('role', '')
24
- content = message.get('content', '')
25
- if role and content:
26
- formatted_messages.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
27
-
28
- # If the first message is not from 'system' and we want to add the system prompt, do so
29
- if add_system_prompt and messages[0]['role'] != 'system':
30
- formatted_messages.insert(0, "<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n")
31
-
32
- formatted_text = "".join(formatted_messages)
33
-
34
- return formatted_text
35
-
36
- @classmethod
37
- def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
38
- tokenizer = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
39
- tokenizer.__class__ = cls
40
- return tokenizer