julicee commited on
Commit
42903ae
1 Parent(s): c6e9571

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 438, "</s>": 439}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"一": 1, "三": 2, "上": 3, "下": 4, "不": 5, "並": 6, "中": 7, "丹": 8, "之": 9, "乖": 10, "九": 11, "也": 12, "亂": 13, "了": 14, "事": 15, "五": 16, "亞": 17, "交": 18, "人": 19, "什": 20, "他": 21, "以": 22, "件": 23, "休": 24, "但": 25, "住": 26, "作": 27, "你": 28, "來": 29, "供": 30, "依": 31, "便": 32, "保": 33, "信": 34, "個": 35, "們": 36, "倒": 37, "假": 38, "做": 39, "備": 40, "傳": 41, "債": 42, "像": 43, "兀": 44, "兇": 45, "光": 46, "免": 47, "兩": 48, "公": 49, "六": 50, "其": 51, "再": 52, "冷": 53, "出": 54, "分": 55, "切": 56, "初": 57, "別": 58, "到": 59, "則": 60, "前": 61, "加": 62, "劣": 63, "動": 64, "北": 65, "十": 66, "千": 67, "午": 68, "危": 69, "原": 70, "去": 71, "又": 72, "及": 73, "友": 74, "口": 75, "可": 76, "台": 77, "吃": 78, "各": 79, "命": 80, "和": 81, "咖": 82, "哈": 83, "哪": 84, "唆": 85, "啊": 86, "問": 87, "啡": 88, "啦": 89, "善": 90, "喊": 91, "喝": 92, "嘍": 93, "噓": 94, "四": 95, "回": 96, "因": 97, "國": 98, "圍": 99, "園": 100, "圖": 101, "在": 102, "地": 103, "場": 104, "外": 105, "多": 106, "夢": 107, "大": 108, "天": 109, "太": 110, "失": 111, "奮": 112, "女": 113, "她": 114, "好": 115, "妙": 116, "妥": 117, "妳": 118, "姊": 119, "始": 120, "媽": 121, "子": 122, "存": 123, "孩": 124, "孫": 125, "安": 126, "完": 127, "定": 128, "宜": 129, "客": 130, "害": 131, "家": 132, "容": 133, "察": 134, "審": 135, "對": 136, "小": 137, "就": 138, "屬": 139, "山": 140, "工": 141, "己": 142, "已": 143, "巷": 144, "布": 145, "帆": 146, "希": 147, "帶": 148, "幣": 149, "幫": 150, "平": 151, "年": 152, "店": 153, "度": 154, "建": 155, "彼": 156, "往": 157, "待": 158, "很": 159, "後": 160, "得": 161, "從": 162, "心": 163, "忙": 164, "快": 165, "念": 166, "怕": 167, "急": 168, "恍": 169, "情": 170, "惡": 171, "想": 172, "惹": 173, "意": 174, "態": 175, "憩": 176, "應": 177, "成": 178, "我": 179, "戴": 180, "戶": 181, "房": 182, "所": 183, "才": 184, "找": 185, "折": 186, "抱": 187, "拍": 188, "持": 189, "掛": 190, "接": 191, "提": 192, "搬": 193, "擔": 194, "救": 195, "教": 196, "散": 197, "文": 198, "新": 199, "斷": 200, "於": 201, "日": 202, "明": 203, "易": 204, "星": 205, "昨": 206, "是": 207, "時": 208, "晚": 209, "晶": 210, "更": 211, "曾": 212, "最": 213, "會": 214, "有": 215, "朋": 216, "望": 217, "本": 218, "杖": 219, "杯": 220, "東": 221, "果": 222, "柔": 223, "查": 224, "柺": 225, "桃": 226, "桌": 227, "業": 228, "樂": 229, "樣": 230, "機": 231, "欠": 232, "次": 233, "歐": 234, "正": 235, "此": 236, "步": 237, "殺": 238, "每": 239, "比": 240, "氣": 241, "決": 242, "沒": 243, "治": 244, "況": 245, "法": 246, "波": 247, "流": 248, "淚": 249, "準": 250, "溫": 251, "滑": 252, "滿": 253, "漠": 254, "澳": 255, "災": 256, "為": 257, "無": 258, "照": 259, "煮": 260, "燥": 261, "爆": 262, "爬": 263, "牆": 264, "牡": 265, "物": 266, "特": 267, "狀": 268, "狠": 269, "獵": 270, "玩": 271, "班": 272, "現": 273, "理": 274, "瑩": 275, "甜": 276, "生": 277, "用": 278, "由": 279, "申": 280, "異": 281, "當": 282, "療": 283, "發": 284, "百": 285, "的": 286, "盤": 287, "目": 288, "直": 289, "相": 290, "眾": 291, "睡": 292, "瞧": 293, "知": 294, "研": 295, "神": 296, "福": 297, "禮": 298, "程": 299, "種": 300, "究": 301, "突": 302, "立": 303, "竹": 304, "第": 305, "等": 306, "管": 307, "築": 308, "約": 309, "組": 310, "給": 311, "統": 312, "經": 313, "緒": 314, "總": 315, "織": 316, "續": 317, "罩": 318, "群": 319, "老": 320, "而": 321, "聊": 322, "聖": 323, "聲": 324, "聽": 325, "肉": 326, "背": 327, "能": 328, "脫": 329, "自": 330, "與": 331, "興": 332, "色": 333, "菜": 334, "萬": 335, "著": 336, "蘋": 337, "蘭": 338, "處": 339, "號": 340, "蝦": 341, "行": 342, "被": 343, "裡": 344, "製": 345, "褲": 346, "西": 347, "要": 348, "規": 349, "親": 350, "覺": 351, "觀": 352, "角": 353, "解": 354, "言": 355, "討": 356, "記": 357, "許": 358, "話": 359, "認": 360, "說": 361, "請": 362, "證": 363, "貌": 364, "貴": 365, "買": 366, "費": 367, "貿": 368, "賭": 369, "賺": 370, "起": 371, "超": 372, "趕": 373, "跌": 374, "跑": 375, "路": 376, "車": 377, "較": 378, "輸": 379, "轉": 380, "辦": 381, "近": 382, "這": 383, "通": 384, "造": 385, "連": 386, "進": 387, "遇": 388, "遊": 389, "運": 390, "過": 391, "道": 392, "還": 393, "邊": 394, "那": 395, "都": 396, "鄉": 397, "醒": 398, "重": 399, "針": 400, "鉤": 401, "銀": 402, "錄": 403, "錢": 404, "錯": 405, "鍊": 406, "鍋": 407, "鎮": 408, "鐘": 409, "長": 410, "門": 411, "開": 412, "閒": 413, "間": 414, "關": 415, "院": 416, "際": 417, "險": 418, "隱": 419, "雙": 420, "電": 421, "靠": 422, "面": 423, "革": 424, "頂": 425, "項": 426, "頭": 427, "題": 428, "餐": 429, "高": 430, "魯": 431, "鵝": 432, "鹹": 433, "麼": 434, "點": 435, "|": 0, "[UNK]": 436, "[PAD]": 437}