{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": null, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "%": 5, "+": 6, "0": 7, "1": 8, "2": 9, "3": 10, "4": 11, "5": 12, "6": 13, "7": 14, "8": 15, "9": 16, "<": 17, "=": 18, ">": 19, "?": 20, "[": 21, "]": 22, "a": 23, "b": 24, "c": 25, "d": 26, "e": 27, "f": 28, "g": 29, "h": 30, "i": 31, "j": 32, "k": 33, "l": 34, "m": 35, "n": 36, "o": 37, "p": 38, "q": 39, "r": 40, "s": 41, "t": 42, "u": 43, "v": 44, "w": 45, "x": 46, "y": 47, "z": 48, "·": 49, "¿": 50, "æ": 51, "ø": 52, "þ": 53, "đ": 54, "ı": 55, "ł": 56, "ɨ": 57, "ɹ": 58, "ʁ": 59, "ʌ": 60, "ʿ": 61, "α": 62, "β": 63, "γ": 64, "δ": 65, "ε": 66, "ζ": 67, "η": 68, "θ": 69, "ι": 70, "κ": 71, "λ": 72, "μ": 73, "ν": 74, "ξ": 75, "ο": 76, "π": 77, "ρ": 78, "σ": 79, "τ": 80, "υ": 81, "φ": 82, "χ": 83, "ψ": 84, "ω": 85, "а": 86, "б": 87, "в": 88, "г": 89, "д": 90, "е": 91, "ж": 92, "з": 93, "и": 94, "к": 95, "л": 96, "м": 97, "н": 98, "о": 99, "п": 100, "р": 101, "с": 102, "т": 103, "у": 104, "ф": 105, "х": 106, "ц": 107, "ч": 108, "ш": 109, "э": 110, "ю": 111, "я": 112, "і": 113, "ј": 114, "ա": 115, "գ": 116, "յ": 117, "א": 118, "ט": 119, "ם": 120, "ש": 121, "ا": 122, "ث": 123, "د": 124, "س": 125, "ش": 126, "ي": 127, "ण": 128, "प": 129, "ा": 130, "া": 131, "ய": 132, "ප": 133, "ර": 134, "ව": 135, "ศ": 136, "ນ": 137, "ເ": 138, "་": 139, "བ": 140, "გ": 141, "თ": 142, "ქ": 143, "წ": 144, "ᄀ": 145, "ᄂ": 146, "ᄃ": 147, "ᄅ": 148, "ᄆ": 149, "ᄇ": 150, "ᄉ": 151, "ᄋ": 152, "ᄌ": 153, "ᄎ": 154, "ᄏ": 155, "ᄐ": 156, "ᄑ": 157, "ᄒ": 158, "ᅡ": 159, "ᅢ": 160, "ᅣ": 161, "ᅥ": 162, "ᅦ": 163, "ᅧ": 164, "ᅩ": 165, "ᅪ": 166, "ᅭ": 167, "ᅮ": 168, "ᅯ": 169, "ᅱ": 170, "ᅲ": 171, "ᅳ": 172, "ᅴ": 173, "ᅵ": 174, "ᆨ": 175, "ᆫ": 176, "ᆯ": 177, "ᆷ": 178, "ᆸ": 179, "ᆼ": 180, "ង": 181, "ᠠ": 182, "ᠢ": 183, "‐": 184, "„": 185, "‡": 186, "‧": 187, "↑": 188, "∂": 189, "∞": 190, "≦": 191, "≪": 192, "⊂": 193, "⋅": 194, "⋯": 195, "┏": 196, "◆": 197, "◇": 198, "♀": 199, "♭": 200, "。": 201, "〕": 202, "ぇ": 203, "か": 204, "き": 205, "く": 206, "け": 207, "こ": 208, "さ": 209, "し": 210, "す": 211, "せ": 212, "そ": 213, "た": 214, "ち": 215, "つ": 216, "て": 217, "と": 218, "は": 219, "ひ": 220, "ふ": 221, "へ": 222, "ほ": 223, "や": 224, "ゅ": 225, "ゝ": 226, "ウ": 227, "ェ": 228, "カ": 229, "キ": 230, "ク": 231, "ケ": 232, "コ": 233, "サ": 234, "シ": 235, "ス": 236, "セ": 237, "ソ": 238, "タ": 239, "チ": 240, "ツ": 241, "テ": 242, "ト": 243, "ハ": 244, "ヒ": 245, "フ": 246, "ヘ": 247, "ホ": 248, "メ": 249, "ラ": 250, "ヱ": 251, "ヵ": 252, "・": 253, "万": 254, "丘": 255, "丹": 256, "主": 257, "久": 258, "乐": 259, "乖": 260, "了": 261, "亢": 262, "享": 263, "亮": 264, "今": 265, "仮": 266, "件": 267, "伐": 268, "住": 269, "佑": 270, "佟": 271, "佳": 272, "侮": 273, "侯": 274, "侵": 275, "俘": 276, "俟": 277, "俣": 278, "俸": 279, "個": 280, "倍": 281, "候": 282, "倩": 283, "倶": 284, "偕": 285, "偵": 286, "傍": 287, "傑": 288, "傳": 289, "債": 290, "僅": 291, "儂": 292, "償": 293, "兌": 294, "兪": 295, "共": 296, "内": 297, "写": 298, "冠": 299, "冲": 300, "冷": 301, "准": 302, "出": 303, "刁": 304, "到": 305, "刻": 306, "剛": 307, "剽": 308, "劔": 309, "勃": 310, "勉": 311, "勤": 312, "匕": 313, "區": 314, "協": 315, "博": 316, "卡": 317, "卦": 318, "厄": 319, "厙": 320, "原": 321, "厠": 322, "又": 323, "双": 324, "受": 325, "叛": 326, "只": 327, "合": 328, "吋": 329, "含": 330, "吹": 331, "呑": 332, "呟": 333, "和": 334, "咥": 335, "咲": 336, "品": 337, "哈": 338, "哲": 339, "唯": 340, "唱": 341, "啄": 342, "喉": 343, "喋": 344, "喩": 345, "喪": 346, "喫": 347, "嘲": 348, "嘴": 349, "噌": 350, "噛": 351, "嚢": 352, "嚴": 353, "囂": 354, "因": 355, "囿": 356, "圏": 357, "圖": 358, "圳": 359, "圻": 360, "坊": 361, "坑": 362, "垠": 363, "垣": 364, "埃": 365, "城": 366, "堕": 367, "塩": 368, "墓": 369, "壌": 370, "壕": 371, "壮": 372, "大": 373, "奉": 374, "奔": 375, "奮": 376, "奸": 377, "妍": 378, "妓": 379, "姓": 380, "姶": 381, "娟": 382, "婦": 383, "媚": 384, "嫉": 385, "嫡": 386, "嬪": 387, "存": 388, "孟": 389, "学": 390, "孫": 391, "安": 392, "宜": 393, "宥": 394, "宸": 395, "容": 396, "寛": 397, "对": 398, "寺": 399, "寿": 400, "對": 401, "展": 402, "岔": 403, "峯": 404, "崋": 405, "崑": 406, "崔": 407, "崙": 408, "嵊": 409, "嵯": 410, "嶝": 411, "嶼": 412, "嶽": 413, "川": 414, "巣": 415, "帰": 416, "幅": 417, "平": 418, "幹": 419, "庄": 420, "度": 421, "廣": 422, "廩": 423, "延": 424, "弈": 425, "弉": 426, "张": 427, "弦": 428, "彅": 429, "彌": 430, "形": 431, "彤": 432, "徨": 433, "循": 434, "徽": 435, "忘": 436, "忻": 437, "怠": 438, "恐": 439, "恬": 440, "恵": 441, "悉": 442, "悝": 443, "惚": 444, "惟": 445, "惣": 446, "愛": 447, "慇": 448, "慈": 449, "態": 450, "憎": 451, "憑": 452, "憔": 453, "憤": 454, "憺": 455, "懇": 456, "懋": 457, "懲": 458, "懼": 459, "戊": 460, "戎": 461, "我": 462, "戛": 463, "截": 464, "戯": 465, "才": 466, "扎": 467, "抵": 468, "拘": 469, "招": 470, "拠": 471, "按": 472, "挟": 473, "振": 474, "捏": 475, "捗": 476, "捜": 477, "捻": 478, "掘": 479, "掣": 480, "掲": 481, "掾": 482, "揃": 483, "揖": 484, "援": 485, "揺": 486, "搭": 487, "摩": 488, "摸": 489, "攘": 490, "攪": 491, "救": 492, "整": 493, "斉": 494, "斑": 495, "斗": 496, "斬": 497, "斯": 498, "旁": 499, "旋": 500, "族": 501, "昆": 502, "易": 503, "昕": 504, "星": 505, "春": 506, "晁": 507, "晰": 508, "晴": 509, "暇": 510, "暗": 511, "暦": 512, "暲": 513, "暴": 514, "曙": 515, "曜": 516, "曝": 517, "朋": 518, "朗": 519, "朦": 520, "朧": 521, "朶": 522, "机": 523, "杞": 524, "杰": 525, "杲": 526, "枚": 527, "柘": 528, "柳": 529, "栓": 530, "栖": 531, "栩": 532, "桧": 533, "桿": 534, "梗": 535, "梵": 536, "梶": 537, "棒": 538, "棲": 539, "植": 540, "楕": 541, "楮": 542, "極": 543, "槌": 544, "樂": 545, "樊": 546, "樒": 547, "樟": 548, "横": 549, "樫": 550, "樵": 551, "檀": 552, "檬": 553, "檮": 554, "檸": 555, "櫛": 556, "欲": 557, "欽": 558, "歆": 559, "歳": 560, "歸": 561, "残": 562, "殲": 563, "殴": 564, "氏": 565, "汐": 566, "汝": 567, "池": 568, "沃": 569, "没": 570, "沱": 571, "河": 572, "油": 573, "沾": 574, "況": 575, "泊": 576, "泗": 577, "泣": 578, "注": 579, "洋": 580, "洒": 581, "津": 582, "洪": 583, "浄": 584, "浦": 585, "海": 586, "涙": 587, "涿": 588, "淀": 589, "淳": 590, "渉": 591, "渕": 592, "渝": 593, "測": 594, "渭": 595, "湍": 596, "溯": 597, "滄": 598, "滇": 599, "滎": 600, "滕": 601, "滷": 602, "滾": 603, "漉": 604, "漫": 605, "漬": 606, "漸": 607, "漿": 608, "潜": 609, "潴": 610, "潼": 611, "澎": 612, "澪": 613, "濤": 614, "濫": 615, "濵": 616, "瀛": 617, "瀞": 618, "瀾": 619, "灼": 620, "炸": 621, "烽": 622, "焙": 623, "熈": 624, "熕": 625, "熹": 626, "燃": 627, "燎": 628, "營": 629, "燵": 630, "爻": 631, "犀": 632, "犁": 633, "状": 634, "狂": 635, "狩": 636, "狭": 637, "狼": 638, "猫": 639, "玖": 640, "琥": 641, "琰": 642, "琶": 643, "瑜": 644, "瑟": 645, "瑣": 646, "瑶": 647, "璧": 648, "瓌": 649, "瓔": 650, "瓠": 651, "甌": 652, "甑": 653, "甕": 654, "甫": 655, "甲": 656, "畚": 657, "畝": 658, "畠": 659, "畢": 660, "畦": 661, "當": 662, "疇": 663, "疎": 664, "疑": 665, "疣": 666, "疫": 667, "疱": 668, "痘": 669, "痢": 670, "痰": 671, "痴": 672, "瘍": 673, "白": 674, "皇": 675, "皐": 676, "皝": 677, "盒": 678, "目": 679, "相": 680, "盾": 681, "省": 682, "眷": 683, "眸": 684, "睡": 685, "睿": 686, "矢": 687, "砂": 688, "砺": 689, "磁": 690, "磧": 691, "磯": 692, "礁": 693, "礎": 694, "礪": 695, "社": 696, "祓": 697, "禍": 698, "禎": 699, "禧": 700, "科": 701, "秒": 702, "秣": 703, "秩": 704, "程": 705, "稍": 706, "稙": 707, "稻": 708, "穂": 709, "穢": 710, "突": 711, "窒": 712, "窠": 713, "窯": 714, "竈": 715, "童": 716, "笙": 717, "笥": 718, "笹": 719, "筒": 720, "筰": 721, "筵": 722, "箋": 723, "箍": 724, "箒": 725, "節": 726, "篩": 727, "籌": 728, "籾": 729, "粒": 730, "粕": 731, "粘": 732, "粥": 733, "糧": 734, "紊": 735, "紘": 736, "索": 737, "紳": 738, "絡": 739, "絢": 740, "絨": 741, "絶": 742, "綵": 743, "線": 744, "緞": 745, "縉": 746, "繚": 747, "级": 748, "罫": 749, "置": 750, "美": 751, "羲": 752, "翟": 753, "翫": 754, "翳": 755, "耿": 756, "聊": 757, "聟": 758, "聲": 759, "聳": 760, "聾": 761, "肱": 762, "肺": 763, "胚": 764, "胤": 765, "胴": 766, "能": 767, "脅": 768, "脈": 769, "脱": 770, "脹": 771, "腐": 772, "腑": 773, "膨": 774, "臻": 775, "臼": 776, "舁": 777, "舌": 778, "舛": 779, "舶": 780, "艙": 781, "艮": 782, "芋": 783, "芍": 784, "芝": 785, "芥": 786, "苔": 787, "苗": 788, "苴": 789, "茄": 790, "茫": 791, "茸": 792, "荏": 793, "荘": 794, "莇": 795, "菊": 796, "萇": 797, "萍": 798, "落": 799, "葆": 800, "葛": 801, "葦": 802, "蒯": 803, "蒴": 804, "蔓": 805, "蔣": 806, "蔵": 807, "蕪": 808, "薇": 809, "薙": 810, "藻": 811, "蘄": 812, "蘊": 813, "蘋": 814, "蘭": 815, "蘿": 816, "虔": 817, "蛛": 818, "蛟": 819, "蜘": 820, "蜴": 821, "蜷": 822, "蝙": 823, "蝶": 824, "蝸": 825, "螂": 826, "螺": 827, "蟇": 828, "衆": 829, "行": 830, "衞": 831, "衢": 832, "衫": 833, "衾": 834, "袢": 835, "袿": 836, "要": 837, "覈": 838, "視": 839, "覗": 840, "訂": 841, "訃": 842, "訛": 843, "訪": 844, "詛": 845, "詭": 846, "詰": 847, "詵": 848, "詹": 849, "誨": 850, "請": 851, "論": 852, "諮": 853, "諱": 854, "諶": 855, "謔": 856, "謡": 857, "謨": 858, "譙": 859, "讐": 860, "豆": 861, "豊": 862, "豬": 863, "豳": 864, "豹": 865, "貘": 866, "貪": 867, "貴": 868, "貸": 869, "賑": 870, "賛": 871, "賠": 872, "賤": 873, "贋": 874, "贍": 875, "赛": 876, "赤": 877, "趙": 878, "跛": 879, "跳": 880, "踊": 881, "踞": 882, "踪": 883, "蹊": 884, "蹋": 885, "躁": 886, "躍": 887, "軀": 888, "軻": 889, "輓": 890, "輔": 891, "輯": 892, "輳": 893, "轡": 894, "迎": 895, "近": 896, "述": 897, "迴": 898, "逃": 899, "逍": 900, "逞": 901, "連": 902, "逼": 903, "適": 904, "遮": 905, "遷": 906, "遽": 907, "避": 908, "邀": 909, "邏": 910, "郊": 911, "郯": 912, "酌": 913, "酎": 914, "酛": 915, "酢": 916, "酸": 917, "醇": 918, "醒": 919, "釋": 920, "量": 921, "釐": 922, "金": 923, "釜": 924, "針": 925, "鈍": 926, "鈴": 927, "鉈": 928, "鉛": 929, "鉞": 930, "銃": 931, "銕": 932, "銚": 933, "鋪": 934, "錆": 935, "錠": 936, "鍮": 937, "鎬": 938, "鐔": 939, "鐸": 940, "鑚": 941, "鑢": 942, "開": 943, "間": 944, "閔": 945, "閨": 946, "闘": 947, "陋": 948, "陟": 949, "院": 950, "陳": 951, "陶": 952, "隘": 953, "際": 954, "隠": 955, "雁": 956, "集": 957, "雌": 958, "離": 959, "難": 960, "需": 961, "霍": 962, "霸": 963, "非": 964, "靭": 965, "靳": 966, "鞘": 967, "鞬": 968, "鞭": 969, "頗": 970, "頸": 971, "顆": 972, "顒": 973, "顥": 974, "顧": 975, "駅": 976, "駈": 977, "驍": 978, "驕": 979, "驛": 980, "驟": 981, "驢": 982, "髄": 983, "髢": 984, "魂": 985, "魃": 986, "魍": 987, "鯨": 988, "鰐": 989, "鳳": 990, "鴈": 991, "鵄": 992, "鵠": 993, "鵲": 994, "鶚": 995, "鷺": 996, "鸕": 997, "鸚": 998, "鸞": 999, "麦": 1000, "鼠": 1001, "鼾": 1002, "齟": 1003, "齬": 1004, "##ᅥ": 1005, "##ᆼ": 1006, "##ᅱ": 1007, "##ᆫ": 1008, "##ᅢ": 1009, "##ᅮ": 1010, "##ᆨ": 1011, "##ᅡ": 1012, "##ᅩ": 1013, "##ᅵ": 1014, "##ᅧ": 1015, "##ᅴ": 1016, "##ᅳ": 1017, "##ᆯ": 1018, "##ᅪ": 1019, "##ᅦ": 1020, "##ᅭ": 1021, "##ᆷ": 1022, "##ᅲ": 1023, "##ᆸ": 1024, "##n": 1025, "##u": 1026, "##s": 1027, "##e": 1028, "##d": 1029, "##1": 1030, "##l": 1031, "##a": 1032, "##k": 1033, "##6": 1034, "##5": 1035, "##3": 1036, "##ᅯ": 1037, "##8": 1038, "##ᅣ": 1039, "##9": 1040, "##7": 1041, "##0": 1042, "##4": 1043, "##p": 1044, "##2": 1045, "un": 1046, "##us": 1047, "##ed": 1048, "unus": 1049, "unused": 1050, "##ᅡᆫ": 1051, "##ᅡᆼ": 1052, "##ᅩᆼ": 1053, "##ᅮᆫ": 1054, "##ᅥᆫ": 1055, "##ᅧᆼ": 1056, "으": 1057, "하": 1058, "##ᅵᆫ": 1059, "##ᅧᆫ": 1060, "기": 1061, "그": 1062, "과": 1063, "나": 1064, "서": 1065, "스": 1066, "야": 1067, "화": 1068, "##ᅧᆨ": 1069, "거": 1070, "구": 1071, "고": 1072, "도": 1073, "드": 1074, "리": 1075, "무": 1076, "마": 1077, "버": 1078, "부": 1079, "바": 1080, "사": 1081, "시": 1082, "우": 1083, "오": 1084, "이": 1085, "와": 1086, "요": 1087, "유": 1088, "저": 1089, "주": 1090, "지": 1091, "해": 1092, "##ᅯᆫ": 1093 } } }