|
{
|
|
"add_bos_token": true,
|
|
"add_eos_token": false,
|
|
"added_tokens_decoder": {
|
|
"0": {
|
|
"content": "<pad>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"1": {
|
|
"content": "<eos>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"2": {
|
|
"content": "<bos>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"3": {
|
|
"content": "<unk>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"359": {
|
|
"content": "<0x8E>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"362": {
|
|
"content": "<0x91>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"366": {
|
|
"content": "<0x95>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"375": {
|
|
"content": "<0x9E>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"376": {
|
|
"content": "<0x9F>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"383": {
|
|
"content": "<0xA6>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"386": {
|
|
"content": "<0xA9>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"388": {
|
|
"content": "<0xAB>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"390": {
|
|
"content": "<0xAD>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"391": {
|
|
"content": "<0xAE>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"396": {
|
|
"content": "<0xB3>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"442": {
|
|
"content": "<0xE1>",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"477": {
|
|
"content": "on",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"492": {
|
|
"content": "al",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"494": {
|
|
"content": "le",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"520": {
|
|
"content": "ic",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"527": {
|
|
"content": "ha",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"556": {
|
|
"content": "na",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"567": {
|
|
"content": "po",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"568": {
|
|
"content": "os",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"574": {
|
|
"content": "ing",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"576": {
|
|
"content": "▁of",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"590": {
|
|
"content": "▁I",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"609": {
|
|
"content": "ow",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"621": {
|
|
"content": "ati",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"639": {
|
|
"content": "and",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"738": {
|
|
"content": "ak",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"745": {
|
|
"content": "out",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"936": {
|
|
"content": "ill",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"956": {
|
|
"content": "no",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1016": {
|
|
"content": "AT",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1023": {
|
|
"content": "ob",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1115": {
|
|
"content": "ical",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1137": {
|
|
"content": "ins",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1175": {
|
|
"content": "the",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1282": {
|
|
"content": "ian",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1314": {
|
|
"content": "ip",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1471": {
|
|
"content": "ene",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"1725": {
|
|
"content": "US",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"2441": {
|
|
"content": "New",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"2483": {
|
|
"content": "oci",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"2716": {
|
|
"content": "IR",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"2988": {
|
|
"content": "hal",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"3596": {
|
|
"content": "AND",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"3642": {
|
|
"content": "……",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"4994": {
|
|
"content": "DB",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"5363": {
|
|
"content": "Mon",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"6477": {
|
|
"content": "bet",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"7552": {
|
|
"content": "▁Development",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"8032": {
|
|
"content": "Unit",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"8826": {
|
|
"content": "▁Product",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"9355": {
|
|
"content": "Ca",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"9998": {
|
|
"content": "▁Human",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"10284": {
|
|
"content": "AU",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"11122": {
|
|
"content": "Bet",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"11832": {
|
|
"content": "▁Index",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"13247": {
|
|
"content": "",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"13460": {
|
|
"content": "HD",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"13661": {
|
|
"content": "Pen",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"13741": {
|
|
"content": "CG",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"18638": {
|
|
"content": "Ya",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"30336": {
|
|
"content": "hea",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"30573": {
|
|
"content": "▁Gross",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"31238": {
|
|
"content": "Road",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"38551": {
|
|
"content": "▁GDP",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"45029": {
|
|
"content": "▁Domestic",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"53189": {
|
|
"content": "Republic",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"78918": {
|
|
"content": "Cep",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"82512": {
|
|
"content": "rish",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"83910": {
|
|
"content": "GDP",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"88255": {
|
|
"content": "acta",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"89844": {
|
|
"content": "Tho",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"101600": {
|
|
"content": "bap",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"140391": {
|
|
"content": "PEC",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"153185": {
|
|
"content": "▁UNDP",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"176345": {
|
|
"content": "្រ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"216267": {
|
|
"content": "dge",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"234815": {
|
|
"content": "Astronom",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235248": {
|
|
"content": "▁",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235249": {
|
|
"content": "e",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235250": {
|
|
"content": "a",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235251": {
|
|
"content": "t",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235252": {
|
|
"content": "i",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235253": {
|
|
"content": "o",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235254": {
|
|
"content": "n",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235255": {
|
|
"content": "r",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235256": {
|
|
"content": "s",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235257": {
|
|
"content": "l",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235258": {
|
|
"content": "d",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235259": {
|
|
"content": "h",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235260": {
|
|
"content": "c",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235261": {
|
|
"content": "u",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235262": {
|
|
"content": "m",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235263": {
|
|
"content": "p",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235264": {
|
|
"content": "g",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235265": {
|
|
"content": ".",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235266": {
|
|
"content": "f",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235267": {
|
|
"content": "y",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235268": {
|
|
"content": "b",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235269": {
|
|
"content": ",",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235271": {
|
|
"content": "w",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235272": {
|
|
"content": "v",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235273": {
|
|
"content": "k",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235274": {
|
|
"content": "1",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235275": {
|
|
"content": ")",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235276": {
|
|
"content": "0",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235277": {
|
|
"content": "S",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235278": {
|
|
"content": "(",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235279": {
|
|
"content": "T",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235280": {
|
|
"content": "A",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235281": {
|
|
"content": "\"",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235283": {
|
|
"content": "/",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235284": {
|
|
"content": "2",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235285": {
|
|
"content": "I",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235288": {
|
|
"content": "C",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235289": {
|
|
"content": ";",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235290": {
|
|
"content": "-",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235291": {
|
|
"content": "E",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235292": {
|
|
"content": ":",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235293": {
|
|
"content": "=",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235294": {
|
|
"content": "R",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235295": {
|
|
"content": "P",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235296": {
|
|
"content": "M",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235297": {
|
|
"content": "x",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235299": {
|
|
"content": "D",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235300": {
|
|
"content": "N",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235301": {
|
|
"content": "L",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235302": {
|
|
"content": "O",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235304": {
|
|
"content": "3",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235305": {
|
|
"content": "B",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235306": {
|
|
"content": "z",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235307": {
|
|
"content": "]",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235308": {
|
|
"content": "5",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235309": {
|
|
"content": "[",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235310": {
|
|
"content": "4",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235311": {
|
|
"content": "F",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235312": {
|
|
"content": "j",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235313": {
|
|
"content": ">",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235314": {
|
|
"content": "H",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235315": {
|
|
"content": "9",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235317": {
|
|
"content": "q",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235318": {
|
|
"content": "6",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235319": {
|
|
"content": "G",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235321": {
|
|
"content": "8",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235323": {
|
|
"content": "$",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235324": {
|
|
"content": "7",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235325": {
|
|
"content": "W",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235327": {
|
|
"content": "U",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235330": {
|
|
"content": "V",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235333": {
|
|
"content": "K",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235336": {
|
|
"content": "?",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235338": {
|
|
"content": "J",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235340": {
|
|
"content": "+",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235342": {
|
|
"content": "Y",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235348": {
|
|
"content": "@",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235349": {
|
|
"content": "’",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235356": {
|
|
"content": "X",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235358": {
|
|
"content": "%",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235366": {
|
|
"content": "“",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235368": {
|
|
"content": "Q",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235369": {
|
|
"content": "”",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235382": {
|
|
"content": "Z",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235389": {
|
|
"content": "–",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235417": {
|
|
"content": "…",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235421": {
|
|
"content": "",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235465": {
|
|
"content": ":",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235481": {
|
|
"content": "»",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235496": {
|
|
"content": "«",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235549": {
|
|
"content": "‘",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235600": {
|
|
"content": "π",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235611": {
|
|
"content": "·",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235657": {
|
|
"content": "°",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"235762": {
|
|
"content": "",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"236130": {
|
|
"content": "×",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"236156": {
|
|
"content": "→",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"236338": {
|
|
"content": " ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"236591": {
|
|
"content": "",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"236728": {
|
|
"content": "´",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"239275": {
|
|
"content": "÷",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"239532": {
|
|
"content": "្",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"239773": {
|
|
"content": "ា",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"239928": {
|
|
"content": "ន",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"240175": {
|
|
"content": "រ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"240620": {
|
|
"content": "ក",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"240755": {
|
|
"content": "ស",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"240970": {
|
|
"content": "ម",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241020": {
|
|
"content": "ប",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241202": {
|
|
"content": "ត",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241294": {
|
|
"content": "ង",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241477": {
|
|
"content": "ល",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241750": {
|
|
"content": "ទ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241842": {
|
|
"content": "ិ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"241997": {
|
|
"content": "ុ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242058": {
|
|
"content": "ី",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242181": {
|
|
"content": "យ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242243": {
|
|
"content": "ព",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242257": {
|
|
"content": "ំ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242340": {
|
|
"content": "់",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242536": {
|
|
"content": "វ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242553": {
|
|
"content": "ដ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242797": {
|
|
"content": "គ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242820": {
|
|
"content": "ច",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242843": {
|
|
"content": "េ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"242926": {
|
|
"content": "ូ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243070": {
|
|
"content": "អ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243084": {
|
|
"content": "ជ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243106": {
|
|
"content": "ខ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243409": {
|
|
"content": "ោ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243505": {
|
|
"content": "ើ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243580": {
|
|
"content": "ែ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243679": {
|
|
"content": "៩",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243952": {
|
|
"content": "ណ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243953": {
|
|
"content": "១",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"243980": {
|
|
"content": "ះ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244081": {
|
|
"content": "ហ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244094": {
|
|
"content": "៖",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244249": {
|
|
"content": "ភ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244362": {
|
|
"content": "ញ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244441": {
|
|
"content": "ថ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244456": {
|
|
"content": "ធ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244475": {
|
|
"content": "០",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244519": {
|
|
"content": "ៅ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"244565": {
|
|
"content": "ួ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"245270": {
|
|
"content": "៉",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"245286": {
|
|
"content": "ផ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"245505": {
|
|
"content": "ៃ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"245574": {
|
|
"content": "ឹ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"245634": {
|
|
"content": "២",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246122": {
|
|
"content": "៨",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246261": {
|
|
"content": "័",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246351": {
|
|
"content": "៦",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246643": {
|
|
"content": "៥",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246733": {
|
|
"content": "៊",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246933": {
|
|
"content": "ឺ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246934": {
|
|
"content": "ៀ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246944": {
|
|
"content": "ឆ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"246945": {
|
|
"content": "ឡ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247021": {
|
|
"content": "៣",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247030": {
|
|
"content": "ៗ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247219": {
|
|
"content": "ឌ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247361": {
|
|
"content": "",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247693": {
|
|
"content": "ឃ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247694": {
|
|
"content": "ឋ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247740": {
|
|
"content": "៍",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247857": {
|
|
"content": "៌",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"247987": {
|
|
"content": "៤",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"248090": {
|
|
"content": "ឯ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"248637": {
|
|
"content": "៧",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"248666": {
|
|
"content": "ៈ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"248718": {
|
|
"content": "ឈ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"248733": {
|
|
"content": "ឥ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"249008": {
|
|
"content": "ឧ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"249082": {
|
|
"content": "ឱ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"249187": {
|
|
"content": "៏",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"249644": {
|
|
"content": "ឬ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"250581": {
|
|
"content": "ឪ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"250684": {
|
|
"content": "ឿ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"251966": {
|
|
"content": "ឲ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"252586": {
|
|
"content": "៛",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
},
|
|
"254823": {
|
|
"content": "ឍ",
|
|
"lstrip": false,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": false
|
|
}
|
|
},
|
|
"bos_token": "<bos>",
|
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<eos>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
"clean_up_tokenization_spaces": false,
|
|
"eos_token": "<eos>",
|
|
"legacy": null,
|
|
"model_max_length": 1000000000000000019884624838656,
|
|
"pad_token": "<eos>",
|
|
"sp_model_kwargs": {},
|
|
"spaces_between_special_tokens": false,
|
|
"tokenizer_class": "GemmaTokenizer",
|
|
"unk_token": "<unk>",
|
|
"use_default_system_prompt": false
|
|
}
|
|
|