|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "+ุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "+ู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "ุจ+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "+ูู
", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"4": { |
|
"content": "+ุงุช", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"5": { |
|
"content": "+ู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"6": { |
|
"content": "ู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"7": { |
|
"content": "+ูู
ุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"8": { |
|
"content": "+ูุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"9": { |
|
"content": "+ู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"10": { |
|
"content": "+ูุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"11": { |
|
"content": "+ูู
ุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"12": { |
|
"content": "+ุฉ", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"13": { |
|
"content": "ู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"14": { |
|
"content": "+ูู
", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"15": { |
|
"content": "+ูู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"16": { |
|
"content": "+ุช", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"17": { |
|
"content": "[ุจุฑูุฏ]", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"18": { |
|
"content": "[ู
ุณุชุฎุฏู
]", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"19": { |
|
"content": "ูู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"20": { |
|
"content": "ุงู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"21": { |
|
"content": "[ุฑุงุจุท]", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"22": { |
|
"content": "ุณ+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"23": { |
|
"content": "+ุงู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"24": { |
|
"content": "+ูุง", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"25": { |
|
"content": "+ู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"26": { |
|
"content": "+ูู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"27": { |
|
"content": "+ูู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"28": { |
|
"content": "+ูู", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"29": { |
|
"content": "ู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"30": { |
|
"content": "ู+", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": true, |
|
"special": true |
|
}, |
|
"31": { |
|
"content": "[PAD]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"32": { |
|
"content": "[UNK]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"33": { |
|
"content": "[CLS]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"34": { |
|
"content": "[SEP]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"35": { |
|
"content": "[MASK]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "[CLS]", |
|
"do_basic_tokenize": true, |
|
"do_lower_case": false, |
|
"mask_token": "[MASK]", |
|
"max_len": 512, |
|
"model_max_length": 512, |
|
"never_split": [ |
|
"+ู", |
|
"+ูู
ุง", |
|
"ู+", |
|
"+ูุง", |
|
"+ูู", |
|
"ู+", |
|
"+ูู", |
|
"+ุงู", |
|
"+ูู
", |
|
"+ุฉ", |
|
"[ุจุฑูุฏ]", |
|
"ูู+", |
|
"+ู", |
|
"+ุช", |
|
"+ู", |
|
"ุณ+", |
|
"ู+", |
|
"[ู
ุณุชุฎุฏู
]", |
|
"+ูู
", |
|
"+ุง", |
|
"ุจ+", |
|
"ู+", |
|
"+ูุง", |
|
"+ูุง", |
|
"+ูู", |
|
"+ูู
ุง", |
|
"ุงู+", |
|
"+ู", |
|
"+ูู", |
|
"+ุงุช", |
|
"[ุฑุงุจุท]" |
|
], |
|
"pad_token": "[PAD]", |
|
"sep_token": "[SEP]", |
|
"strip_accents": null, |
|
"tokenize_chinese_chars": true, |
|
"tokenizer_class": "BertTokenizer", |
|
"unk_token": "[UNK]" |
|
} |
|
|