finiteautomata
commited on
Commit
•
10fdf78
1
Parent(s):
1af02e8
Improve tokenization
Browse files- added_tokens.json +1 -1
- config.json +1 -1
- pytorch_model.bin +2 -2
- special_tokens_map.json +1 -1
- tokenizer.json +0 -0
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"hashtag": 31004, "emoji": 31005, "@usuario": 31002, "url": 31003}
|
config.json
CHANGED
@@ -39,5 +39,5 @@
|
|
39 |
"transformers_version": "4.6.1",
|
40 |
"type_vocab_size": 2,
|
41 |
"use_cache": true,
|
42 |
-
"vocab_size":
|
43 |
}
|
|
|
39 |
"transformers_version": "4.6.1",
|
40 |
"type_vocab_size": 2,
|
41 |
"use_cache": true,
|
42 |
+
"vocab_size": 31006
|
43 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8522d856bef1ac40efc2ba7dc6be41cf196e35bef5b6b6a7ca7beeb120e6d14d
|
3 |
+
size 439524626
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|