dgnk007 commited on
Commit
8198ffe
1 Parent(s): f36420c

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +4 -0
  2. special_tokens_map.json +4 -4
  3. tokenizer.json +27 -0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<pad>": 50258,
3
+ "<s>": 50257
4
+ }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
6
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "###",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<pad>"
6
  }
tokenizer.json CHANGED
@@ -3,6 +3,15 @@
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
 
 
 
 
 
 
 
 
 
6
  {
7
  "id": 50256,
8
  "content": "<|endoftext|>",
@@ -11,6 +20,24 @@
11
  "rstrip": false,
12
  "normalized": false,
13
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
15
  ],
16
  "normalizer": null,
 
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
6
+ {
7
+ "id": 21017,
8
+ "content": "###",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
  {
16
  "id": 50256,
17
  "content": "<|endoftext|>",
 
20
  "rstrip": false,
21
  "normalized": false,
22
  "special": true
23
+ },
24
+ {
25
+ "id": 50257,
26
+ "content": "<s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 50258,
35
+ "content": "<pad>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  }
42
  ],
43
  "normalizer": null,