Yi-Ko-DUS-9B / tokenizer_config.json
beomi's picture
Upload tokenizer
62387a0 verified
raw
history blame
9.56 kB
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78413": {
"content": "<|sep|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"78414": {
"content": "<|acc|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"78415": {
"content": "<|rrn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"78416": {
"content": "<|tel|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"78417": {
"content": "ㄱ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78418": {
"content": "ㄴ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78419": {
"content": "ㄷ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78420": {
"content": "ㄹ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78421": {
"content": "ㅁ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78422": {
"content": "ㅂ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78423": {
"content": "ㅅ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78424": {
"content": "ㅇ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78425": {
"content": "ㅈ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78426": {
"content": "ㅊ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78427": {
"content": "ㅋ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78428": {
"content": "ㅌ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78429": {
"content": "ㅍ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78430": {
"content": "ㅎ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78431": {
"content": "ㅏ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78432": {
"content": "ㅐ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78433": {
"content": "ㅑ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78434": {
"content": "ㅒ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78435": {
"content": "ㅓ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78436": {
"content": "ㅔ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78437": {
"content": "ㅕ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78438": {
"content": "ㅖ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78439": {
"content": "ㅗ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78440": {
"content": "ㅘ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78441": {
"content": "ㅙ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78442": {
"content": "ㅚ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78443": {
"content": "ㅛ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78444": {
"content": "ㅜ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78445": {
"content": "ㅝ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78446": {
"content": "ㅞ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78447": {
"content": "ㅟ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78448": {
"content": "ㅠ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78449": {
"content": "ㅡ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78450": {
"content": "ㅢ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78451": {
"content": "ㅣ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78452": {
"content": "ㄲ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78453": {
"content": "ㄳ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78454": {
"content": "ㄵ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78455": {
"content": "ㄶ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78456": {
"content": "ㄺ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78457": {
"content": "ㄻ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78458": {
"content": "ㄼ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78459": {
"content": "ㄽ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78460": {
"content": "ㄾ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78461": {
"content": "ㄿ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78462": {
"content": "ㅀ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"78463": {
"content": "ㅄ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|endoftext|>",
"fast": true,
"legacy": true,
"model_max_length": 4096,
"pad_token": "<unk>",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": true
}