chemma-2b / tokenizer_config.json
yerevann's picture
Upload tokenizer
3af5931 verified
raw
history blame
13.6 kB
{
"add_bos_token": false,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<bos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "[START_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"8": {
"content": "[END_SMILES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"9": {
"content": "[FORMULA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"10": {
"content": "[SYNONYM]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"11": {
"content": "[RELATED]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"12": {
"content": "[SIMILAR]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"13": {
"content": "[PROPERTY]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"14": {
"content": "[SAS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"15": {
"content": "[WEIGHT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"16": {
"content": "[TPSA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"17": {
"content": "[CLOGP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"18": {
"content": "[QED]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"19": {
"content": "[NUMHDONORS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"20": {
"content": "[NUMHACCEPTORS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"21": {
"content": "[NUMHETEROATOMS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"22": {
"content": "[NUMROTATABLEBONDS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"23": {
"content": "[NOCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"24": {
"content": "[NHOHCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"25": {
"content": "[RINGCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"26": {
"content": "[HEAVYATOMCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"27": {
"content": "[FRACTIONCSP3]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"28": {
"content": "[NUMAROMATICRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"29": {
"content": "[NUMSATURATEDRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"30": {
"content": "[NUMAROMATICHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"31": {
"content": "[NUMAROMATICCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"32": {
"content": "[NUMSATURATEDHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"33": {
"content": "[NUMSATURATEDCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"34": {
"content": "[NUMALIPHATICRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"35": {
"content": "[NUMALIPHATICHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"36": {
"content": "[NUMALIPHATICCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"37": {
"content": "[IUPAC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38": {
"content": "[VAR_NAME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"39": {
"content": "[VAR_DESC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"40": {
"content": "[VAR_VAL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"41": {
"content": "[ASSAY_NAME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"42": {
"content": "[ASSAY_DESC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"43": {
"content": "[/FORMULA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"44": {
"content": "[/SYNONYM]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"45": {
"content": "[/RELATED]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"46": {
"content": "[/SIMILAR]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"47": {
"content": "[/PROPERTY]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"48": {
"content": "[/SAS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49": {
"content": "[/WEIGHT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"50": {
"content": "[/TPSA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"51": {
"content": "[/CLOGP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"52": {
"content": "[/QED]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"53": {
"content": "[/NUMHDONORS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"54": {
"content": "[/NUMHACCEPTORS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"55": {
"content": "[/NUMHETEROATOMS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"56": {
"content": "[/NUMROTATABLEBONDS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"57": {
"content": "[/NOCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"58": {
"content": "[/NHOHCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"59": {
"content": "[/RINGCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"60": {
"content": "[/HEAVYATOMCOUNT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"61": {
"content": "[/FRACTIONCSP3]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"62": {
"content": "[/NUMAROMATICRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"63": {
"content": "[/NUMSATURATEDRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"64": {
"content": "[/NUMAROMATICHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"65": {
"content": "[/NUMAROMATICCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"66": {
"content": "[/NUMSATURATEDHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"67": {
"content": "[/NUMSATURATEDCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"68": {
"content": "[/NUMALIPHATICRINGS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"69": {
"content": "[/NUMALIPHATICHETEROCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"70": {
"content": "[/NUMALIPHATICCARBOCYCLES]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"71": {
"content": "[/IUPAC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"72": {
"content": "[/VAR_NAME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"73": {
"content": "[/VAR_DESC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"74": {
"content": "[/VAR_VAL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"75": {
"content": "[/ASSAY_NAME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"76": {
"content": "[/ASSAY_DESC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<bos>",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"legacy": null,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}