hamishivi commited on
Commit
1f643f9
1 Parent(s): d165550

Fix tokenizer.

Browse files
special_tokens_map.json CHANGED
@@ -1,4 +1,23 @@
1
  {
2
- "bos_token": "<|begin_of_text|>",
3
- "eos_token": "<|end_of_text|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -2300,7 +2300,7 @@
2300
  },
2301
  {
2302
  "id": 128255,
2303
- "content": "<|reserved_special_token_250|>",
2304
  "single_word": false,
2305
  "lstrip": false,
2306
  "rstrip": false,
@@ -2348,6 +2348,7 @@
2348
  "end_of_word_suffix": null,
2349
  "fuse_unk": false,
2350
  "byte_fallback": false,
 
2351
  "vocab": {
2352
  "!": 0,
2353
  "\"": 1,
@@ -410500,4 +410501,4 @@
410500
  "éĶ ¦"
410501
  ]
410502
  }
410503
- }
 
2300
  },
2301
  {
2302
  "id": 128255,
2303
+ "content": "<pad>",
2304
  "single_word": false,
2305
  "lstrip": false,
2306
  "rstrip": false,
 
2348
  "end_of_word_suffix": null,
2349
  "fuse_unk": false,
2350
  "byte_fallback": false,
2351
+ "ignore_merges": false,
2352
  "vocab": {
2353
  "!": 0,
2354
  "\"": 1,
 
410501
  "éĶ ¦"
410502
  ]
410503
  }
410504
+ }
tokenizer_config.json CHANGED
@@ -2060,4 +2060,4 @@
2060
  "model_max_length": 1000000000000000019884624838656,
2061
  "pad_token": "<pad>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
- }
 
2060
  "model_max_length": 1000000000000000019884624838656,
2061
  "pad_token": "<pad>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
+ }