samleeasus commited on
Commit
43a614c
1 Parent(s): adf9138

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,4 +1,10 @@
1
  {
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
@@ -20,29 +26,8 @@
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
- "special_output_token": {
24
- "content": "<|spcout|>",
25
- "lstrip": false,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "sep_token": {
31
- "content": "<|sep|>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "eot_token": {
38
- "content": "<|eot|>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "output_token": {
45
- "content": "<|output|>",
46
  "lstrip": false,
47
  "normalized": true,
48
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|spcout|>",
4
+ "<|sep|>",
5
+ "<|eot|>",
6
+ "<|output|>"
7
+ ],
8
  "bos_token": {
9
  "content": "<s>",
10
  "lstrip": false,
 
26
  "rstrip": false,
27
  "single_word": false
28
  },
29
+ "unk_token": {
30
+ "content": "<unk>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea9e8b933076f55f821e6a823c7bee1c202d1d31722147d622170e27224ee69f
3
  size 787400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc65134e593a8b5aaac1a9b1154165c4d25d896a8b8c26b5bb4faa445715fe2c
3
  size 787400
tokenizer_config.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": true,
 
 
 
 
 
 
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<s>",
 
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": true,
4
+ "additional_special_tokens": [
5
+ "<|spcout|>",
6
+ "<|sep|>",
7
+ "<|eot|>",
8
+ "<|output|>"
9
+ ],
10
  "bos_token": {
11
  "__type": "AddedToken",
12
  "content": "<s>",