Upload tokenizer
Browse files- tokenizer.json +10 -0
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
@@ -6946,6 +6946,10 @@
|
|
6946 |
"normalizer": {
|
6947 |
"type": "Sequence",
|
6948 |
"normalizers": [
|
|
|
|
|
|
|
|
|
6949 |
{
|
6950 |
"type": "Replace",
|
6951 |
"pattern": {
|
@@ -7025,6 +7029,12 @@
|
|
7025 |
},
|
7026 |
{
|
7027 |
"type": "Fuse"
|
|
|
|
|
|
|
|
|
|
|
|
|
7028 |
}
|
7029 |
]
|
7030 |
},
|
|
|
6946 |
"normalizer": {
|
6947 |
"type": "Sequence",
|
6948 |
"normalizers": [
|
6949 |
+
{
|
6950 |
+
"type": "Prepend",
|
6951 |
+
"prepend": "▁"
|
6952 |
+
},
|
6953 |
{
|
6954 |
"type": "Replace",
|
6955 |
"pattern": {
|
|
|
7029 |
},
|
7030 |
{
|
7031 |
"type": "Fuse"
|
7032 |
+
},
|
7033 |
+
{
|
7034 |
+
"type": "Strip",
|
7035 |
+
"content": " ",
|
7036 |
+
"start": 1,
|
7037 |
+
"stop": 0
|
7038 |
}
|
7039 |
]
|
7040 |
},
|
tokenizer_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
-
"add_prefix_space":
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
+
"add_prefix_space": true,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|