add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ś": 0, "è": 1, "ụ": 2, "t": 3, "g": 4, "ĺ": 5, "i": 6, "ọ": 8, "—": 9, "ẃ": 10, "]": 11, "̣": 12, "m": 13, "w": 14, "k": 15, "ṣ": 16, "j": 17, "ṛ": 18, "c": 19, "h": 20, "́": 21, "ń": 22, "ù": 23, "̀": 24, "ì": 25, "l": 26, "b": 27, "ú": 28, "ạ": 29, "ò": 30, "n": 31, "ṕ": 32, "e": 33, "x": 34, "p": 35, "y": 36, "í": 37, "é": 38, "ó": 39, "à": 40, "f": 41, "ǹ": 42, "[": 43, "u": 44, "v": 45, "á": 46, "’": 47, "ẹ": 48, "r": 49, "'": 50, "a": 51, "d": 52, "ḷ": 53, "o": 54, "s": 55, "|": 7, "[UNK]": 56, "[PAD]": 57}
|