finiteautomata commited on
Commit
10fdf78
1 Parent(s): 1af02e8

Improve tokenization

Browse files
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"[USER]": 31002, "[HASHTAG]": 31003, "[EMOJI]": 31004}
 
1
+ {"hashtag": 31004, "emoji": 31005, "@usuario": 31002, "url": 31003}
config.json CHANGED
@@ -39,5 +39,5 @@
39
  "transformers_version": "4.6.1",
40
  "type_vocab_size": 2,
41
  "use_cache": true,
42
- "vocab_size": 31005
43
  }
 
39
  "transformers_version": "4.6.1",
40
  "type_vocab_size": 2,
41
  "use_cache": true,
42
+ "vocab_size": 31006
43
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d202594b4bc33496adba0cb8bd75b97421749b6fc787b570f596002cf08b760e
3
- size 439522012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8522d856bef1ac40efc2ba7dc6be41cf196e35bef5b6b6a7ca7beeb120e6d14d
3
+ size 439524626
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[USER]", "[HASHTAG]", "[EMOJI]"]}
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff