{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 128, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Lowercase" }, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Removed", "invert": false }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 0 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "e": 5, "a": 6, "s": 7, "i": 8, "r": 9, "n": 10, "AH0": 11, "o": 12, "N": 13, "t": 14, "l": 15, "S": 16, "L": 17, "T": 18, "R": 19, "K": 20, "c": 21, "d": 22, "D": 23, "u": 24, "IH0": 25, "m": 26, "M": 27, "Z": 28, "h": 29, "g": 30, "p": 31, "ER0": 32, "IY0": 33, "b": 34, "B": 35, "P": 36, "EH1": 37, "AE1": 38, "AA1": 39, "y": 40, "k": 41, "IH1": 42, "F": 43, "f": 44, "G": 45, "w": 46, "V": 47, "v": 48, "NG": 49, "'": 50, "IY1": 51, "EY1": 52, "HH": 53, "W": 54, "SH": 55, "OW1": 56, "AO1": 57, "OW0": 58, "AH1": 59, "UW1": 60, "AY1": 61, "JH": 62, "z": 63, "CH": 64, "Y": 65, "AA0": 66, "ER1": 67, "EH2": 68, "IH2": 69, "TH": 70, "AY2": 71, "AE2": 72, "EY2": 73, "AA2": 74, "EH0": 75, "j": 76, "AW1": 77, "OW2": 78, "x": 79, "IY2": 80, "UW0": 81, "AO2": 82, "UH1": 83, "AE0": 84, "q": 85, "AO0": 86, "AH2": 87, "UW2": 88, "AY0": 89, "OY1": 90, "-": 91, "EY0": 92, "DH": 93, "AW2": 94, "ER2": 95, "ZH": 96, "UH2": 97, "AW0": 98, "UH0": 99, "OY2": 100, "OY0": 101, ".": 102 }, "unk_token": "" } }