add tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 94, "</s>": 95}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ऑ": 0, "ग": 1, "ल": 2, "n": 3, "म": 4, "b": 5, "ढ़": 6, "ॉ": 7, "i": 8, "ऋ": 9, "भ": 10, "े": 11, "ई": 12, "फ": 13, "z": 14, "य": 15, "ं": 16, "ः": 17, "ड": 18, "द": 19, "ा": 20, "थ": 21, "ृ": 22, "व": 23, "v": 24, "्": 25, "ॅ": 26, "ऐ": 27, "ष": 28, "ब": 29, "r": 30, "d": 31, "अ": 32, "आ": 33, "ज़": 34, "ञ": 35, "m": 36, "t": 37, "श": 38, "g": 39, "k": 40, "ठ": 41, "y": 42, "|": 57, "ण": 44, "ध": 45, "।": 46, "ि": 47, "ी": 48, "ढ": 49, "ू": 50, "a": 51, "h": 52, "ँ": 53, "j": 54, "f": 55, "झ": 56, "l": 58, "ह": 59, "e": 60, "s": 61, "घ": 62, "ु": 63, "ज": 64, "w": 65, "ट": 66, "ो": 67, "स": 68, "छ": 69, "ऊ": 70, "न": 71, "प": 72, "ौ": 73, "ओ": 74, "u": 75, "इ": 76, "उ": 77, "o": 78, "ए": 79, "p": 80, "ड़": 81, "औ": 82, "ै": 83, "क़": 84, "x": 85, "ग़": 86, "र": 87, "ख": 88, "c": 89, "क": 90, "त": 91, "़": 92, "च": 93, "[UNK]": 93, "[PAD]": 94}
|