cindyangelira
commited on
Commit
•
67ec522
1
Parent(s):
bc0f6df
Add tokenizer
Browse files- tokenizer.json +2 -2
- tokenizer_config.json +0 -2
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
|
3 |
+
size 17082832
|
tokenizer_config.json
CHANGED
@@ -44,13 +44,11 @@
|
|
44 |
"bos_token": "<s>",
|
45 |
"clean_up_tokenization_spaces": true,
|
46 |
"cls_token": "<s>",
|
47 |
-
"do_lower_case": false,
|
48 |
"eos_token": "</s>",
|
49 |
"mask_token": "<mask>",
|
50 |
"model_max_length": 512,
|
51 |
"pad_token": "<pad>",
|
52 |
"sep_token": "</s>",
|
53 |
-
"sp_model_kwargs": {},
|
54 |
"tokenizer_class": "XLMRobertaTokenizer",
|
55 |
"unk_token": "<unk>"
|
56 |
}
|
|
|
44 |
"bos_token": "<s>",
|
45 |
"clean_up_tokenization_spaces": true,
|
46 |
"cls_token": "<s>",
|
|
|
47 |
"eos_token": "</s>",
|
48 |
"mask_token": "<mask>",
|
49 |
"model_max_length": 512,
|
50 |
"pad_token": "<pad>",
|
51 |
"sep_token": "</s>",
|
|
|
52 |
"tokenizer_class": "XLMRobertaTokenizer",
|
53 |
"unk_token": "<unk>"
|
54 |
}
|