add modified albert tokenizer
Browse files
README.md
CHANGED
@@ -11,6 +11,5 @@ print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
|
|
11 |
# ['[CLS]', '▁this', '▁is', '▁a', '▁text', '▁with', '▁accent', 's', '▁and', '▁capital', '▁letters', '[SEP]']
|
12 |
tokenizer = AutoTokenizer.from_pretrained("huggingface-course/albert-tokenizer-without-normalizer")
|
13 |
print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
|
14 |
-
#
|
15 |
-
['[CLS]', '▁', '<unk>', 'his', '▁is', '▁a', '▁text', '▁with', '▁', '<unk>', 'cc', '<unk>', 'nts', '▁and', '▁', '<unk>', '▁', '<unk>', '[SEP]']
|
16 |
```
|
|
|
11 |
# ['[CLS]', '▁this', '▁is', '▁a', '▁text', '▁with', '▁accent', 's', '▁and', '▁capital', '▁letters', '[SEP]']
|
12 |
tokenizer = AutoTokenizer.from_pretrained("huggingface-course/albert-tokenizer-without-normalizer")
|
13 |
print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
|
14 |
+
# ['[CLS]', '▁', '<unk>', 'his', '▁is', '▁a', '▁text', '▁with', '▁', '<unk>', 'cc', '<unk>', 'nts', '▁and', '▁', '<unk>', '▁', '<unk>', '[SEP]']
|
|
|
15 |
```
|