Locutusque commited on
Commit
c8ef202
1 Parent(s): fa23fe6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +4 -0
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -70,6 +73,7 @@
70
  "<|USER|>"
71
  ],
72
  "bos_token": "<|bos|>",
 
73
  "clean_up_tokenization_spaces": false,
74
  "eos_token": "<|endoftext|>",
75
  "legacy": true,
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
73
  "<|USER|>"
74
  ],
75
  "bos_token": "<|bos|>",
76
+ "chat_template": "{%- set ns = namespace(found=false) -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{%- set ns.found = true -%}{%- endif -%}{%- endfor -%}{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '<|im_start|>system\n' + message['content'].rstrip() + '<|im_end|>\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'<|im_start|>user\n' + message['content'].rstrip() + '<|im_end|>\n'-}}{%- else -%}{{-'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'<|im_start|>assistant\n'-}}{%- endif -%}",
77
  "clean_up_tokenization_spaces": false,
78
  "eos_token": "<|endoftext|>",
79
  "legacy": true,