simonschoe
commited on
Commit
•
056293e
1
Parent(s):
9af577a
add tokenizer
Browse files- tokenizer.json +4 -2
tokenizer.json
CHANGED
@@ -53,7 +53,8 @@
|
|
53 |
"pre_tokenizer": {
|
54 |
"type": "ByteLevel",
|
55 |
"add_prefix_space": false,
|
56 |
-
"trim_offsets": true
|
|
|
57 |
},
|
58 |
"post_processor": {
|
59 |
"type": "RobertaProcessing",
|
@@ -71,7 +72,8 @@
|
|
71 |
"decoder": {
|
72 |
"type": "ByteLevel",
|
73 |
"add_prefix_space": true,
|
74 |
-
"trim_offsets": true
|
|
|
75 |
},
|
76 |
"model": {
|
77 |
"type": "BPE",
|
|
|
53 |
"pre_tokenizer": {
|
54 |
"type": "ByteLevel",
|
55 |
"add_prefix_space": false,
|
56 |
+
"trim_offsets": true,
|
57 |
+
"use_regex": true
|
58 |
},
|
59 |
"post_processor": {
|
60 |
"type": "RobertaProcessing",
|
|
|
72 |
"decoder": {
|
73 |
"type": "ByteLevel",
|
74 |
"add_prefix_space": true,
|
75 |
+
"trim_offsets": true,
|
76 |
+
"use_regex": true
|
77 |
},
|
78 |
"model": {
|
79 |
"type": "BPE",
|