saeedmaroof commited on
Commit
c9ef27b
1 Parent(s): f028548

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ﭘ": 0, "ٔ": 1, "ر": 2, "ز": 3, "m": 4, "ص": 5, "a": 6, "ے": 7, ")": 8, "ـ": 9, "(": 10, "َ": 11, "ﺑ": 12, "ی": 13, "ء": 14, "ﻀ": 15, "ژ": 16, "ت": 17, "ف": 18, "ﻥ": 19, "پ": 20, "و": 21, "f": 22, "ﺪ": 23, "&": 24, "c": 25, "ﺷ": 26, "ﺭ": 27, "ئ": 28, "ً": 29, "،": 30, "e": 31, "ۀ": 32, "ن": 33, "ﺘ": 34, "د": 35, "گ": 36, "ک": 37, "ة": 38, "ﻌ": 39, "ﻟ": 40, "ض": 41, "ش": 42, "z": 43, "u": 44, "ﺴ": 45, "k": 46, "ِ": 47, "ع": 48, "ْ": 49, "ب": 50, "ﺒ": 51, "»": 52, "y": 53, "؛": 54, "ﺩ": 55, "n": 57, "i": 58, "#": 59, "ﺖ": 60, "ﺸ": 61, "š": 62, "…": 63, "ل": 64, "ٌ": 65, "_": 66, "ذ": 67, "ﻋ": 68, "–": 69, "ﯽ": 70, "ج": 71, "ّ": 72, "p": 73, "غ": 74, "ط": 75, "أ": 76, "b": 77, "ﺱ": 78, "ى": 79, "ي": 80, "م": 81, "ُ": 82, "آ": 83, "ؤ": 84, "ﻤ": 85, "ا": 86, "؟": 87, "٬": 88, "r": 89, "س": 90, "ق": 91, "ﺎ": 92, "ﺮ": 93, "ﻢ": 94, "ﻭ": 95, "چ": 96, "خ": 97, "ﯿ": 98, "ك": 99, "t": 100, "«": 101, "ﺍ": 102, "ث": 103, "g": 104, "o": 105, "ﺧ": 106, "h": 107, "ﯾ": 108, "ﻡ": 109, "ﮐ": 110, "d": 111, "ه": 112, "ā": 113, "ﻪ": 114, "s": 115, "ﮔ": 116, "ﻮ": 117, "ظ": 118, "ح": 119, "ﻧ": 120, "q": 121, "|": 56, "[UNK]": 122, "[PAD]": 123}