SurMuy_v1_512512201 / tokenizer_config.json
AingHongsin's picture
Upload 7 files
572fcbc verified
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<bos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"359": {
"content": "<0x8E>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"362": {
"content": "<0x91>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"366": {
"content": "<0x95>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"375": {
"content": "<0x9E>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"376": {
"content": "<0x9F>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"383": {
"content": "<0xA6>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"386": {
"content": "<0xA9>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"388": {
"content": "<0xAB>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"390": {
"content": "<0xAD>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"391": {
"content": "<0xAE>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"396": {
"content": "<0xB3>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"442": {
"content": "<0xE1>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"477": {
"content": "on",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"492": {
"content": "al",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"494": {
"content": "le",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"520": {
"content": "ic",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"527": {
"content": "ha",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"556": {
"content": "na",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"567": {
"content": "po",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"568": {
"content": "os",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"574": {
"content": "ing",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"576": {
"content": "▁of",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"590": {
"content": "▁I",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"609": {
"content": "ow",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"621": {
"content": "ati",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"639": {
"content": "and",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"738": {
"content": "ak",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"745": {
"content": "out",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"936": {
"content": "ill",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"956": {
"content": "no",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1016": {
"content": "AT",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1023": {
"content": "ob",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1115": {
"content": "ical",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1137": {
"content": "ins",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1175": {
"content": "the",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1282": {
"content": "ian",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1314": {
"content": "ip",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1471": {
"content": "ene",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1725": {
"content": "US",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2441": {
"content": "New",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2483": {
"content": "oci",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2716": {
"content": "IR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2988": {
"content": "hal",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"3596": {
"content": "AND",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"3642": {
"content": "……",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"4994": {
"content": "DB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"5363": {
"content": "Mon",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"6477": {
"content": "bet",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"7552": {
"content": "▁Development",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"8032": {
"content": "Unit",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"8826": {
"content": "▁Product",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9355": {
"content": "Ca",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9998": {
"content": "▁Human",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10284": {
"content": "AU",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"11122": {
"content": "Bet",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"11832": {
"content": "▁Index",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"13247": {
"content": "​​",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"13460": {
"content": "HD",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"13661": {
"content": "Pen",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"13741": {
"content": "CG",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"18638": {
"content": "Ya",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30336": {
"content": "hea",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30573": {
"content": "▁Gross",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"31238": {
"content": "Road",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38551": {
"content": "▁GDP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45029": {
"content": "▁Domestic",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53189": {
"content": "Republic",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"78918": {
"content": "Cep",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"82512": {
"content": "rish",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"83910": {
"content": "GDP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"88255": {
"content": "acta",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"89844": {
"content": "Tho",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"101600": {
"content": "bap",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"140391": {
"content": "PEC",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"153185": {
"content": "▁UNDP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"176345": {
"content": "្រ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"216267": {
"content": "dge",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"234815": {
"content": "Astronom",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235248": {
"content": "▁",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235249": {
"content": "e",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235250": {
"content": "a",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235251": {
"content": "t",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235252": {
"content": "i",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235253": {
"content": "o",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235254": {
"content": "n",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235255": {
"content": "r",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235256": {
"content": "s",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235257": {
"content": "l",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235258": {
"content": "d",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235259": {
"content": "h",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235260": {
"content": "c",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235261": {
"content": "u",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235262": {
"content": "m",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235263": {
"content": "p",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235264": {
"content": "g",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235265": {
"content": ".",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235266": {
"content": "f",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235267": {
"content": "y",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235268": {
"content": "b",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235269": {
"content": ",",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235271": {
"content": "w",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235272": {
"content": "v",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235273": {
"content": "k",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235274": {
"content": "1",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235275": {
"content": ")",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235276": {
"content": "0",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235277": {
"content": "S",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235278": {
"content": "(",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235279": {
"content": "T",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235280": {
"content": "A",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235281": {
"content": "\"",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235283": {
"content": "/",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235284": {
"content": "2",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235285": {
"content": "I",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235288": {
"content": "C",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235289": {
"content": ";",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235290": {
"content": "-",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235291": {
"content": "E",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235292": {
"content": ":",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235293": {
"content": "=",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235294": {
"content": "R",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235295": {
"content": "P",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235296": {
"content": "M",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235297": {
"content": "x",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235299": {
"content": "D",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235300": {
"content": "N",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235301": {
"content": "L",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235302": {
"content": "O",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235304": {
"content": "3",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235305": {
"content": "B",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235306": {
"content": "z",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235307": {
"content": "]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235308": {
"content": "5",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235309": {
"content": "[",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235310": {
"content": "4",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235311": {
"content": "F",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235312": {
"content": "j",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235313": {
"content": ">",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235314": {
"content": "H",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235315": {
"content": "9",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235317": {
"content": "q",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235318": {
"content": "6",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235319": {
"content": "G",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235321": {
"content": "8",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235323": {
"content": "$",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235324": {
"content": "7",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235325": {
"content": "W",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235327": {
"content": "U",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235330": {
"content": "V",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235333": {
"content": "K",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235336": {
"content": "?",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235338": {
"content": "J",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235340": {
"content": "+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235342": {
"content": "Y",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235348": {
"content": "@",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235349": {
"content": "’",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235356": {
"content": "X",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235358": {
"content": "%",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235366": {
"content": "“",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235368": {
"content": "Q",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235369": {
"content": "”",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235382": {
"content": "Z",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235389": {
"content": "–",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235417": {
"content": "…",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235421": {
"content": "​",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235465": {
"content": ":",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235481": {
"content": "»",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235496": {
"content": "«",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235549": {
"content": "‘",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235600": {
"content": "π",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235611": {
"content": "·",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235657": {
"content": "°",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"235762": {
"content": "‌",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"236130": {
"content": "×",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"236156": {
"content": "→",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"236338": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"236591": {
"content": "­",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"236728": {
"content": "´",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"239275": {
"content": "÷",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"239532": {
"content": "្",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"239773": {
"content": "ា",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"239928": {
"content": "ន",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"240175": {
"content": "រ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"240620": {
"content": "ក",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"240755": {
"content": "ស",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"240970": {
"content": "ម",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241020": {
"content": "ប",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241202": {
"content": "ត",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241294": {
"content": "ង",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241477": {
"content": "ល",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241750": {
"content": "ទ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241842": {
"content": "ិ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"241997": {
"content": "ុ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242058": {
"content": "ី",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242181": {
"content": "យ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242243": {
"content": "ព",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242257": {
"content": "ំ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242340": {
"content": "់",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242536": {
"content": "វ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242553": {
"content": "ដ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242797": {
"content": "គ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242820": {
"content": "ច",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242843": {
"content": "េ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"242926": {
"content": "ូ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243070": {
"content": "អ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243084": {
"content": "ជ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243106": {
"content": "ខ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243409": {
"content": "ោ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243505": {
"content": "ើ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243580": {
"content": "ែ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243679": {
"content": "៩",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243952": {
"content": "ណ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243953": {
"content": "១",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"243980": {
"content": "ះ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244081": {
"content": "ហ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244094": {
"content": "៖",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244249": {
"content": "ភ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244362": {
"content": "ញ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244441": {
"content": "ថ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244456": {
"content": "ធ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244475": {
"content": "០",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244519": {
"content": "ៅ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"244565": {
"content": "ួ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"245270": {
"content": "៉",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"245286": {
"content": "ផ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"245505": {
"content": "ៃ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"245574": {
"content": "ឹ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"245634": {
"content": "២",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246122": {
"content": "៨",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246261": {
"content": "័",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246351": {
"content": "៦",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246643": {
"content": "៥",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246733": {
"content": "៊",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246933": {
"content": "ឺ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246934": {
"content": "ៀ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246944": {
"content": "ឆ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"246945": {
"content": "ឡ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247021": {
"content": "៣",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247030": {
"content": "ៗ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247219": {
"content": "ឌ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247361": {
"content": "",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247693": {
"content": "ឃ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247694": {
"content": "ឋ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247740": {
"content": "៍",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247857": {
"content": "៌",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"247987": {
"content": "៤",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"248090": {
"content": "ឯ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"248637": {
"content": "៧",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"248666": {
"content": "ៈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"248718": {
"content": "ឈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"248733": {
"content": "ឥ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"249008": {
"content": "ឧ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"249082": {
"content": "ឱ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"249187": {
"content": "៏",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"249644": {
"content": "ឬ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"250581": {
"content": "ឪ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"250684": {
"content": "ឿ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"251966": {
"content": "ឲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"252586": {
"content": "៛",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"254823": {
"content": "ឍ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<bos>",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<eos>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"legacy": null,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<eos>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}