SaulLu commited on
Commit
50a007b
1 Parent(s): 67b371b

add modified albert tokenizer

Browse files
Files changed (1) hide show
  1. README.md +1 -2
README.md CHANGED
@@ -11,6 +11,5 @@ print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
11
  # ['[CLS]', '▁this', '▁is', '▁a', '▁text', '▁with', '▁accent', 's', '▁and', '▁capital', '▁letters', '[SEP]']
12
  tokenizer = AutoTokenizer.from_pretrained("huggingface-course/albert-tokenizer-without-normalizer")
13
  print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
14
- #
15
- ['[CLS]', '▁', '<unk>', 'his', '▁is', '▁a', '▁text', '▁with', '▁', '<unk>', 'cc', '<unk>', 'nts', '▁and', '▁', '<unk>', '▁', '<unk>', '[SEP]']
16
  ```
 
11
  # ['[CLS]', '▁this', '▁is', '▁a', '▁text', '▁with', '▁accent', 's', '▁and', '▁capital', '▁letters', '[SEP]']
12
  tokenizer = AutoTokenizer.from_pretrained("huggingface-course/albert-tokenizer-without-normalizer")
13
  print(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
14
+ # ['[CLS]', '▁', '<unk>', 'his', '▁is', '▁a', '▁text', '▁with', '▁', '<unk>', 'cc', '<unk>', 'nts', '▁and', '▁', '<unk>', '▁', '<unk>', '[SEP]']
 
15
  ```