{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 1024, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 1, "pad_type_id": 0, "pad_token": "<|PAD|>" }, "added_tokens": [ { "id": 0, "content": "<|EOS|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|PAD|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<|UNK|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "Unigram", "unk_id": 2, "vocab": [ [ "<|EOS|>", 0.0 ], [ "<|PAD|>", 0.0 ], [ "<|UNK|>", 0.0 ], [ "Ġ", -1.5260435611730827 ], [ "#", -2.7402009810912062 ], [ "Ã", -3.1166905774882707 ], [ "e", -3.2260840326309896 ], [ "a", -3.400272151145682 ], [ "o", -3.4247111498875125 ], [ "n", -3.4843012141328966 ], [ "t", -3.5851466950828534 ], [ "l", -3.5966397727159105 ], [ "s", -3.606519701447205 ], [ "v", -3.673389625897381 ], [ "Å", -3.738661921889596 ], [ "k", -3.88985675826423 ], [ "¡", -3.896324504449853 ], [ "m", -3.900019204288416 ], [ "d", -3.924142000578403 ], [ "i", -3.957515863848771 ], [ "u", -3.98427965340516 ], [ "Ń", -4.01885777375824 ], [ "1", -4.076782718271231 ], [ "r", -4.101788234787666 ], [ "h", -4.165955307243209 ], [ "Ä", -4.209926280752244 ], [ ",", -4.2650454975175265 ], [ "c", -4.2986020434927 ], [ "j", -4.299016883023702 ], [ "Ċ", -4.318158151676027 ], [ "p", -4.340959953844402 ], [ "z", -4.458983744584648 ], [ "y", -4.51828856430196 ], [ "Ľ", -4.625728639873191 ], [ "J", -4.632495838144841 ], [ "b", -4.702284800331416 ], [ "0", -4.89877498638786 ], [ "T", -4.930942842309104 ], [ "¾", -4.9475363370353715 ], [ "½", -5.112839051696183 ], [ "Ļ", -5.1232783823983485 ], [ "8", -5.165680149039558 ], [ "©", -5.1662964199731185 ], [ ".", -5.311316866688184 ], [ "į", -5.390915371298602 ], [ "X", -5.441727913115729 ], [ "A", -5.5452444992227115 ], [ "9", -5.700158070211225 ], [ "¯", -5.794693725474856 ], [ "B", -5.946768892374564 ], [ "N", -6.231750626230964 ], [ "!", -6.43383380299835 ], [ "2", -6.458671934150379 ], [ "7", -6.481553328401322 ], [ "6", -6.54739707067993 ], [ "¥", -7.119259719177025 ], [ "3", -7.1764773118059555 ], [ "4", -7.283631800958723 ], [ "?", -7.301649946289903 ], [ "5", -7.427897072063589 ], [ "D", -7.4577806463041565 ], [ "Ī", -7.530995391628789 ], [ "C", -7.594917277581869 ], [ "ı", -7.619961419829636 ], [ "º", -7.878503588175192 ], [ "f", -7.964820780829486 ], [ "g", -8.277706569992713 ], [ "³", -8.34045981977625 ], [ "H", -10.272558688607978 ], [ "Y", -10.559397898241222 ], [ "x", -10.567971957971364 ], [ "w", -11.220052439924926 ], [ "q", -12.08126913846109 ], [ "P", -12.337988972147176 ], [ "¶", -12.928795639896862 ], [ "ĩ", -12.979709743960514 ], [ "¼", -13.307792559933008 ], [ "¤", -13.509243173516644 ], [ "â", -13.57425863274756 ], [ "§", -13.57425863274756 ], [ "Ģ", -13.59274912950999 ], [ "ļ", -13.677077762284064 ], [ "¨", -13.925495429629638 ], [ "´", -14.030614768906831 ], [ "«", -14.15899565226756 ], [ "ł", -14.358448819224304 ], [ "¢", -14.479162057573522 ], [ "Â", -14.660728142957538 ], [ "ª", -14.775686233662904 ], [ "®", -14.905597093757487 ], [ "»", -15.23056985336876 ], [ "Ĥ", -15.296708147458816 ], [ "+", -16.127004563448008 ], [ "¦", -16.252149490984237 ], [ "Ħ", -16.691400414639208 ], [ "²", -17.225105598348073 ], [ "=", -17.350105598357175 ], [ "Ĵ", -17.659629407920946 ], [ "Ł", -18.109629407920945 ], [ "/", -18.109629407920945 ], [ "¹", -18.44296274125428 ], [ "`", -18.44296274125428 ], [ "Ĭ", -18.942962741214338 ], [ "ĵ", -18.94296274125428 ], [ "ħ", -18.94296274125428 ], [ "Ï", -18.94296274125428 ] ], "byte_fallback": false } }