SaulLu commited on
Commit
2ba79ba
1 Parent(s): b4310d5

add tokenizer

Browse files
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [],
6
+ "normalizer": null,
7
+ "pre_tokenizer": {
8
+ "type": "ByteLevel",
9
+ "add_prefix_space": false,
10
+ "trim_offsets": true
11
+ },
12
+ "post_processor": {
13
+ "type": "ByteLevel",
14
+ "add_prefix_space": true,
15
+ "trim_offsets": false
16
+ },
17
+ "decoder": {
18
+ "type": "ByteLevel",
19
+ "add_prefix_space": true,
20
+ "trim_offsets": true
21
+ },
22
+ "model": {
23
+ "type": "BPE",
24
+ "dropout": null,
25
+ "unk_token": null,
26
+ "continuing_subword_prefix": null,
27
+ "end_of_word_suffix": null,
28
+ "fuse_unk": false,
29
+ "vocab": {},
30
+ "merges": []
31
+ }
32
+ }
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tokenizer_class": "PreTrainedTokenizerFast"}