sarvam-2b-v0.5 / tokenizer_config.json
rahular's picture
Update tokenizer_config.json
ce48fc5 verified
raw
history blame
24.6 kB
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<<SYS>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"4": {
"content": "<</SYS>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"5": {
"content": "[INST]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"6": {
"content": "[/INST]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"7": {
"content": "<<reserved_token_1>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"8": {
"content": "<<reserved_token_2>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"9": {
"content": "<<reserved_token_3>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"10": {
"content": "<<reserved_token_4>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"11": {
"content": "<<reserved_token_5>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"12": {
"content": "<<reserved_token_6>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"13": {
"content": "<<reserved_token_7>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"14": {
"content": "<<reserved_token_8>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"15": {
"content": "<<reserved_token_9>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"16": {
"content": "<<reserved_token_10>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"17": {
"content": "<<reserved_token_11>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"18": {
"content": "<<reserved_token_12>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"19": {
"content": "<<reserved_token_13>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"20": {
"content": "<<reserved_token_14>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"21": {
"content": "<<reserved_token_15>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"22": {
"content": "<<reserved_token_16>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"23": {
"content": "<<reserved_token_17>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"24": {
"content": "<<reserved_token_18>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"25": {
"content": "<<reserved_token_19>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"26": {
"content": "<<reserved_token_20>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"27": {
"content": "<<reserved_token_21>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"28": {
"content": "<<reserved_token_22>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"29": {
"content": "<<reserved_token_23>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"30": {
"content": "<<reserved_token_24>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"31": {
"content": "<<reserved_token_25>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"32": {
"content": "<<reserved_token_26>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"33": {
"content": "<<reserved_token_27>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"34": {
"content": "<<reserved_token_28>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"35": {
"content": "<<reserved_token_29>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"36": {
"content": "<<reserved_token_30>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"37": {
"content": "<<reserved_token_31>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38": {
"content": "<<reserved_token_32>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"39": {
"content": "<<reserved_token_33>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"40": {
"content": "<<reserved_token_34>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"41": {
"content": "<<reserved_token_35>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"42": {
"content": "<<reserved_token_36>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"43": {
"content": "<<reserved_token_37>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"44": {
"content": "<<reserved_token_38>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"45": {
"content": "<<reserved_token_39>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"46": {
"content": "<<reserved_token_40>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"47": {
"content": "<<reserved_token_41>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"48": {
"content": "<<reserved_token_42>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49": {
"content": "<<reserved_token_43>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"50": {
"content": "<<reserved_token_44>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"51": {
"content": "<<reserved_token_45>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"52": {
"content": "<<reserved_token_46>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"53": {
"content": "<<reserved_token_47>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"54": {
"content": "<<reserved_token_48>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"55": {
"content": "<<reserved_token_49>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"56": {
"content": "<<reserved_token_50>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"57": {
"content": "<<reserved_token_51>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"58": {
"content": "<<reserved_token_52>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"59": {
"content": "<<reserved_token_53>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"60": {
"content": "<<reserved_token_54>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"61": {
"content": "<<reserved_token_55>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"62": {
"content": "<<reserved_token_56>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"63": {
"content": "<<reserved_token_57>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"64": {
"content": "<<reserved_token_58>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"65": {
"content": "<<reserved_token_59>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"66": {
"content": "<<reserved_token_60>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"67": {
"content": "<<reserved_token_61>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"68": {
"content": "<<reserved_token_62>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"69": {
"content": "<<reserved_token_63>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"70": {
"content": "<<reserved_token_64>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"71": {
"content": "<<reserved_token_65>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"72": {
"content": "<<reserved_token_66>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"73": {
"content": "<<reserved_token_67>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"74": {
"content": "<<reserved_token_68>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"75": {
"content": "<<reserved_token_69>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"76": {
"content": "<<reserved_token_70>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"77": {
"content": "<<reserved_token_71>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"78": {
"content": "<<reserved_token_72>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"79": {
"content": "<<reserved_token_73>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"80": {
"content": "<<reserved_token_74>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"81": {
"content": "<<reserved_token_75>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"82": {
"content": "<<reserved_token_76>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"83": {
"content": "<<reserved_token_77>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"84": {
"content": "<<reserved_token_78>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"85": {
"content": "<<reserved_token_79>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"86": {
"content": "<<reserved_token_80>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"87": {
"content": "<<reserved_token_81>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"88": {
"content": "<<reserved_token_82>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"89": {
"content": "<<reserved_token_83>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"90": {
"content": "<<reserved_token_84>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"91": {
"content": "<<reserved_token_85>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"92": {
"content": "<<reserved_token_86>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"93": {
"content": "<<reserved_token_87>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"94": {
"content": "<<reserved_token_88>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"95": {
"content": "<<reserved_token_89>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"96": {
"content": "<<reserved_token_90>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"97": {
"content": "<<reserved_token_91>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"98": {
"content": "<<reserved_token_92>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"99": {
"content": "<<reserved_token_93>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100": {
"content": "<<reserved_token_94>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"101": {
"content": "<<reserved_token_95>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"102": {
"content": "<<reserved_token_96>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"103": {
"content": "<<reserved_token_97>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"104": {
"content": "<<reserved_token_98>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"105": {
"content": "<<reserved_token_99>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"106": {
"content": "<<reserved_token_100>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"107": {
"content": "<<reserved_token_101>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"108": {
"content": "<<reserved_token_102>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"109": {
"content": "<<reserved_token_103>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"110": {
"content": "<<reserved_token_104>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"111": {
"content": "<<reserved_token_105>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"112": {
"content": "<<reserved_token_106>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"113": {
"content": "<<reserved_token_107>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"114": {
"content": "<<reserved_token_108>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"115": {
"content": "<<reserved_token_109>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"116": {
"content": "<<reserved_token_110>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"117": {
"content": "<<reserved_token_111>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"118": {
"content": "<<reserved_token_112>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"119": {
"content": "<<reserved_token_113>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"120": {
"content": "<<reserved_token_114>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"121": {
"content": "<<reserved_token_115>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"122": {
"content": "<<reserved_token_116>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"123": {
"content": "<<reserved_token_117>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"124": {
"content": "<<reserved_token_118>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"125": {
"content": "<<reserved_token_119>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"126": {
"content": "<<reserved_token_120>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"127": {
"content": "<<reserved_token_121>>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"128": {
"content": "\r",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"129": {
"content": "\t",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"130": {
"content": "\n",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<s>",
"chat_template": ""
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": null,
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}