bert-large-cantonese-sts / added_tokens.json
indiejoseph's picture
Training in progress, step 100
92be39c verified
raw
history blame
8 kB
{
"㔷": 21620,
"㖭": 21330,
"㚻": 21255,
"㞗": 21216,
"㞘": 21384,
"㦸": 21493,
"㨂": 21307,
"㩒": 21182,
"㴓": 21485,
"㷫": 21265,
"乸": 21143,
"仼": 21501,
"佮": 21234,
"侘": 21537,
"偲": 21220,
"僆": 21421,
"僞": 21471,
"僳": 21564,
"儁": 21422,
"儍": 21388,
"兗": 21368,
"冚": 21138,
"冧": 21137,
"凖": 21454,
"勷": 21522,
"卌": 21284,
"卽": 21186,
"厏": 21439,
"厓": 21449,
"厠": 21256,
"厹": 21285,
"吔": 21205,
"吲": 21403,
"吿": 21547,
"呑": 21331,
"呔": 21204,
"咃": 21533,
"咇": 21300,
"咼": 21565,
"哚": 21376,
"唂": 21402,
"唒": 21250,
"唓": 21401,
"唞": 21175,
"唥": 21144,
"唨": 21159,
"唪": 21146,
"唻": 21223,
"啋": 21428,
"啩": 21178,
"啹": 21482,
"喐": 21165,
"喥": 21316,
"喼": 21192,
"嗌": 21129,
"嗮": 21130,
"嗱": 21145,
"嘥": 21151,
"噃": 21197,
"噉": 21128,
"噏": 21170,
"噚": 21135,
"嚙": 21282,
"嚡": 21236,
"嚦": 21455,
"嚫": 21346,
"嚹": 21158,
"嚿": 21134,
"囇": 21612,
"囖": 21140,
"囘": 21504,
"坭": 21315,
"垻": 21538,
"埐": 21294,
"埞": 21180,
"埲": 21288,
"堊": 21309,
"塡": 21511,
"塱": 21187,
"塲": 21445,
"塹": 21481,
"奀": 21306,
"奭": 21492,
"妺": 21465,
"姵": 21536,
"娸": 21569,
"媺": 21431,
"嫗": 21311,
"嫰": 21323,
"嬋": 21400,
"嬲": 21131,
"孭": 21179,
"孲": 21210,
"孻": 21264,
"尐": 21157,
"尙": 21520,
"尢": 21619,
"屘": 21484,
"屙": 21160,
"岃": 21392,
"嶠": 21267,
"幗": 21269,
"幪": 21279,
"廡": 21530,
"廸": 21217,
"廻": 21479,
"彊": 21446,
"彖": 21335,
"徂": 21155,
"忟": 21301,
"惗": 21353,
"愃": 21527,
"愨": 21562,
"慇": 21603,
"慤": 21389,
"憓": 21477,
"戇": 21181,
"戙": 21281,
"戥": 21162,
"扤": 21541,
"扲": 21549,
"扺": 21293,
"抆": 21266,
"抌": 21258,
"抺": 21238,
"拃": 21188,
"拏": 21271,
"拕": 21476,
"挐": 21524,
"捽": 21212,
"掕": 21166,
"掗": 21486,
"掟": 21153,
"掹": 21214,
"揈": 21251,
"揞": 21429,
"揦": 21371,
"揼": 21184,
"揾": 21132,
"搣": 21222,
"搦": 21383,
"搲": 21317,
"搾": 21398,
"摑": 21268,
"摱": 21438,
"摷": 21209,
"撘": 21224,
"撣": 21615,
"撳": 21141,
"撾": 21183,
"擗": 21589,
"擧": 21521,
"擸": 21334,
"攆": 21544,
"攰": 21139,
"攷": 21270,
"旚": 21582,
"旯": 21280,
"旼": 21399,
"昃": 21483,
"昅": 21528,
"昪": 21377,
"昰": 21459,
"昺": 21380,
"暎": 21558,
"暪": 21437,
"曱": 21185,
"朏": 21557,
"朳": 21572,
"柙": 21551,
"栢": 21193,
"栱": 21581,
"梘": 21219,
"椏": 21385,
"椗": 21618,
"榘": 21560,
"榚": 21369,
"樋": 21601,
"樖": 21150,
"樨": 21475,
"樴": 21413,
"橛": 21156,
"檠": 21272,
"櫈": 21173,
"櫟": 21516,
"櫳": 21215,
"欏": 21500,
"殮": 21295,
"殻": 21207,
"氘": 21616,
"氚": 21574,
"氬": 21447,
"氼": 21329,
"沊": 21509,
"沔": 21552,
"沚": 21490,
"泂": 21461,
"涷": 21340,
"淥": 21235,
"淸": 21363,
"湉": 21443,
"湞": 21626,
"湴": 21407,
"滘": 21161,
"漖": 21627,
"潁": 21396,
"潯": 21241,
"澌": 21292,
"濰": 21394,
"濶": 21468,
"瀡": 21435,
"瀦": 21535,
"灃": 21625,
"灕": 21420,
"炆": 21172,
"炑": 21474,
"炘": 21621,
"烚": 21189,
"烴": 21229,
"焫": 21248,
"煇": 21227,
"煬": 21247,
"煱": 21347,
"燶": 21163,
"燾": 21386,
"牀": 21168,
"牘": 21600,
"猁": 21226,
"猢": 21609,
"猻": 21540,
"獌": 21198,
"獴": 21415,
"珓": 21370,
"琚": 21597,
"琤": 21393,
"琿": 21494,
"瑂": 21423,
"瑭": 21573,
"璘": 21555,
"璠": 21240,
"璣": 21299,
"璦": 21556,
"璩": 21508,
"瓘": 21554,
"瓚": 21318,
"甂": 21457,
"甑": 21539,
"甴": 21190,
"畧": 21322,
"畵": 21416,
"疎": 21460,
"疴": 21338,
"痲": 21576,
"痾": 21164,
"癆": 21503,
"癈": 21333,
"癗": 21433,
"癦": 21610,
"癩": 21410,
"睺": 21296,
"砬": 21568,
"砵": 21194,
"硃": 21570,
"硏": 21342,
"硤": 21201,
"礮": 21375,
"祆": 21472,
"祼": 21417,
"禕": 21542,
"禰": 21514,
"稈": 21367,
"穏": 21341,
"窰": 21230,
"竈": 21286,
"竉": 21289,
"竪": 21550,
"笪": 21147,
"筧": 21605,
"篋": 21359,
"簋": 21277,
"簒": 21418,
"粢": 21586,
"糉": 21263,
"糭": 21253,
"糴": 21425,
"紇": 21470,
"紥": 21252,
"綉": 21575,
"綟": 21260,
"綣": 21512,
"綷": 21441,
"緡": 21245,
"緲": 21517,
"縉": 21297,
"縹": 21587,
"繑": 21448,
"繙": 21246,
"缐": 21553,
"罅": 21191,
"罉": 21430,
"罟": 21324,
"羕": 21507,
"羶": 21378,
"翕": 21456,
"耖": 21390,
"肶": 21351,
"胐": 21332,
"脧": 21303,
"脷": 21148,
"腍": 21167,
"膥": 21228,
"膶": 21257,
"臏": 21566,
"舘": 21374,
"舢": 21563,
"舨": 21592,
"艶": 21593,
"苺": 21488,
"茘": 21624,
"菴": 21312,
"蒴": 21343,
"蓀": 21458,
"蔴": 21177,
"蕓": 21518,
"藪": 21302,
"蘄": 21613,
"蘅": 21478,
"蚧": 21579,
"蛺": 21391,
"蜑": 21358,
"蝻": 21259,
"螈": 21291,
"蟈": 21419,
"蟧": 21360,
"蟶": 21233,
"蠄": 21326,
"蠏": 21467,
"蠑": 21328,
"衊": 21469,
"裇": 21304,
"褦": 21221,
"褸": 21171,
"覈": 21290,
"覲": 21453,
"觜": 21496,
"訃": 21571,
"訌": 21412,
"訢": 21466,
"詏": 21244,
"詒": 21531,
"誒": 21152,
"謖": 21473,
"謚": 21237,
"謳": 21278,
"谿": 21580,
"豕": 21491,
"趷": 21424,
"跣": 21206,
"踎": 21202,
"踭": 21203,
"踼": 21404,
"躂": 21426,
"躄": 21195,
"躝": 21274,
"軚": 21196,
"軛": 21357,
"軫": 21349,
"軭": 21497,
"軻": 21434,
"輋": 21239,
"迾": 21325,
"逄": 21594,
"逑": 21611,
"逳": 21211,
"邴": 21310,
"郃": 21604,
"鄕": 21406,
"鄴": 21287,
"酎": 21546,
"釙": 21450,
"鈷": 21548,
"鈹": 21545,
"鉍": 21584,
"鉞": 21525,
"鉬": 21588,
"鉸": 21308,
"鉼": 21387,
"銥": 21462,
"銨": 21365,
"銫": 21440,
"銻": 21532,
"銼": 21432,
"鋇": 21614,
"鋯": 21590,
"錀": 21499,
"錒": 21498,
"錕": 21372,
"錡": 21199,
"鍔": 21515,
"鍚": 21273,
"鍠": 21261,
"鍬": 21379,
"鍶": 21344,
"鎅": 21339,
"鎘": 21529,
"鎢": 21362,
"鏇": 21397,
"鏐": 21585,
"鏝": 21218,
"鏵": 21444,
"鏹": 21502,
"鐡": 21505,
"鑌": 21452,
"鑭": 21567,
"閂": 21154,
"閆": 21463,
"閙": 21366,
"閤": 21381,
"閪": 21200,
"閭": 21409,
"闐": 21591,
"闓": 21442,
"靑": 21405,
"靭": 21262,
"靱": 21599,
"韃": 21242,
"韞": 21354,
"韮": 21254,
"頊": 21348,
"頴": 21327,
"顓": 21427,
"顥": 21411,
"顳": 21319,
"飮": 21561,
"餬": 21276,
"餸": 21133,
"饀": 21414,
"饉": 21534,
"馱": 21480,
"駖": 21356,
"駙": 21617,
"駟": 21489,
"駡": 21598,
"騫": 21314,
"騭": 21364,
"騮": 21174,
"騾": 21487,
"驃": 21336,
"驄": 21337,
"骹": 21355,
"髀": 21142,
"髹": 21350,
"鬅": 21213,
"鬈": 21408,
"鬩": 21543,
"鬭": 21495,
"鬲": 21320,
"魨": 21464,
"鮋": 21596,
"鮟": 21232,
"鮫": 21352,
"鯇": 21305,
"鯡": 21623,
"鯪": 21243,
"鯭": 21382,
"鰂": 21169,
"鰹": 21345,
"鱇": 21231,
"鱘": 21395,
"鱟": 21583,
"鱲": 21208,
"鳯": 21451,
"鴞": 21275,
"鴣": 21607,
"鴴": 21622,
"鵐": 21321,
"鵞": 21510,
"鵪": 21283,
"鶉": 21249,
"鶻": 21608,
"鶿": 21559,
"鷂": 21298,
"鷄": 21176,
"鷓": 21606,
"鷸": 21373,
"鸕": 21526,
"鸛": 21361,
"麪": 21149,
"麫": 21577,
"麿": 21225,
"黐": 21136,
"鼆": 21313,
"鼇": 21602,
"鼴": 21436,
"鼷": 21506,
"齲": 21519,
"齶": 21578,
"龑": 21523,
"龠": 21595,
"龢": 21513
}