calculator-6m / tokenizer.json
georgiyozhegov's picture
Upload tokenizer
833fb7f verified
raw
history blame
8.08 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<sos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "find",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "step",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "answer",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128,
"content": "<bos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"post_processor": null,
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<sos>": 0,
"<eos>": 1,
"<pad>": 2,
"<unk>": 3,
"find": 4,
"step": 5,
"answer": 6,
"\n": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
"-": 12,
".": 13,
"/": 14,
"0": 15,
"1": 16,
"2": 17,
"3": 18,
"4": 19,
"5": 20,
"6": 21,
"7": 22,
"8": 23,
"9": 24,
"=": 25,
"a": 26,
"d": 27,
"e": 28,
"f": 29,
"i": 30,
"n": 31,
"o": 32,
"p": 33,
"r": 34,
"s": 35,
"t": 36,
"w": 37,
"▁": 38,
"▁-": 39,
"\ns": 40,
"ep": 41,
"tep": 42,
"▁=": 43,
"\nstep": 44,
"▁1": 45,
"0.": 46,
"er": 47,
"▁*": 48,
"\na": 49,
"fi": 50,
"nd": 51,
"ns": 52,
"wer": 53,
"▁fi": 54,
"\nans": 55,
"▁find": 56,
"\nanswer": 57,
"▁+": 58,
"▁/": 59,
"▁-1": 60,
"▁2": 61,
"▁3": 62,
"▁0.": 63,
"▁4": 64,
"▁5": 65,
"▁6": 66,
"▁7": 67,
"▁8": 68,
"▁9": 69,
"▁-2": 70,
"▁-3": 71,
"▁-0.": 72,
"▁-4": 73,
"66": 74,
"▁-5": 75,
"33": 76,
".5": 77,
"▁-7": 78,
".2": 79,
"▁-8": 80,
"▁-6": 81,
".1": 82,
"▁-9": 83,
".8": 84,
".4": 85,
"75": 86,
"25": 87,
"71": 88,
"99": 89,
".6": 90,
".3": 91,
"28": 92,
"76": 93,
"74": 94,
"78": 95,
"11": 96,
"00": 97,
"95": 98,
"24": 99,
"73": 100,
"94": 101,
"96": 102,
"98": 103,
"23": 104,
"04": 105,
"08": 106,
"26": 107,
"21": 108,
"05": 109,
"93": 110,
"91": 111,
"2\nstep": 112,
"03": 113,
"06": 114,
"01": 115,
"0\nstep": 116,
"44": 117,
"88": 118,
"55": 119,
"77": 120,
"15": 121,
"16": 122,
"18": 123,
"14": 124,
"13": 125,
"7\nstep": 126,
"▁(": 127
},
"merges": [
[
"▁",
"-"
],
[
"\n",
"s"
],
[
"e",
"p"
],
[
"t",
"ep"
],
[
"▁",
"="
],
[
"\ns",
"tep"
],
[
"▁",
"1"
],
[
"0",
"."
],
[
"e",
"r"
],
[
"▁",
"*"
],
[
"\n",
"a"
],
[
"f",
"i"
],
[
"n",
"d"
],
[
"n",
"s"
],
[
"w",
"er"
],
[
"▁",
"fi"
],
[
"\na",
"ns"
],
[
"▁fi",
"nd"
],
[
"\nans",
"wer"
],
[
"▁",
"+"
],
[
"▁",
"/"
],
[
"▁-",
"1"
],
[
"▁",
"2"
],
[
"▁",
"3"
],
[
"▁",
"0."
],
[
"▁",
"4"
],
[
"▁",
"5"
],
[
"▁",
"6"
],
[
"▁",
"7"
],
[
"▁",
"8"
],
[
"▁",
"9"
],
[
"▁-",
"2"
],
[
"▁-",
"3"
],
[
"▁-",
"0."
],
[
"▁-",
"4"
],
[
"6",
"6"
],
[
"▁-",
"5"
],
[
"3",
"3"
],
[
".",
"5"
],
[
"▁-",
"7"
],
[
".",
"2"
],
[
"▁-",
"8"
],
[
"▁-",
"6"
],
[
".",
"1"
],
[
"▁-",
"9"
],
[
".",
"8"
],
[
".",
"4"
],
[
"7",
"5"
],
[
"2",
"5"
],
[
"7",
"1"
],
[
"9",
"9"
],
[
".",
"6"
],
[
".",
"3"
],
[
"2",
"8"
],
[
"7",
"6"
],
[
"7",
"4"
],
[
"7",
"8"
],
[
"1",
"1"
],
[
"0",
"0"
],
[
"9",
"5"
],
[
"2",
"4"
],
[
"7",
"3"
],
[
"9",
"4"
],
[
"9",
"6"
],
[
"9",
"8"
],
[
"2",
"3"
],
[
"0",
"4"
],
[
"0",
"8"
],
[
"2",
"6"
],
[
"2",
"1"
],
[
"0",
"5"
],
[
"9",
"3"
],
[
"9",
"1"
],
[
"2",
"\nstep"
],
[
"0",
"3"
],
[
"0",
"6"
],
[
"0",
"1"
],
[
"0",
"\nstep"
],
[
"4",
"4"
],
[
"8",
"8"
],
[
"5",
"5"
],
[
"7",
"7"
],
[
"1",
"5"
],
[
"1",
"6"
],
[
"1",
"8"
],
[
"1",
"4"
],
[
"1",
"3"
],
[
"7",
"\nstep"
],
[
"▁",
"("
]
]
}
}