suzgunmirac commited on
Commit
8078f5d
1 Parent(s): 3362270

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"C25F": 32451, "G03D": 32111, "B67B": 32144, "D07B": 32414, "C07D": 32556, "C11C": 32274, "B63B": 32134, "G03B": 32237, "C12R": 32153, "C09H": 32119, "E05D": 32718, "D06M": 32362, "C21B": 32595, "C21D": 32392, "G10G": 32303, "A46B": 32288, "G21G": 32474, "C06F": 32221, "B60K": 32617, "H05G": 32348, "B27F": 32179, "B42B": 32588, "B03C": 32337, "C05D": 32368, "B61D": 32587, "F01B": 32708, "C12S": 32495, "G01R": 32442, "G07C": 32141, "A62D": 32292, "E21F": 32201, "E01C": 32677, "H03K": 32332, "A21D": 32650, "F03G": 32138, "H02J": 32329, "C12C": 32319, "A47B": 32218, "F23D": 32467, "F24F": 32149, "D02H": 32155, "F23J": 32287, "A01M": 32233, "E01H": 32692, "C08H": 32665, "E04B": 32193, "G21F": 32251, "E02D": 32458, "G21H": 32304, "H02G": 32449, "G10K": 32186, "A23P": 32402, "B08B": 32515, "B07C": 32191, "A47L": 32566, "B60J": 32220, "G04F": 32714, "A24B": 32691, "D01H": 32424, "B21B": 32468, "B61C": 32701, "A45B": 32733, "B23P": 32235, "C22C": 32320, "D03D": 32420, "A61J": 32662, "G05G": 32544, "D21G": 32489, "F02P": 32561, "B23Q": 32466, "A23K": 32591, "G21B": 32457, "A61N": 32422, "B25J": 32227, "F41H": 32311, "B25C": 32152, "D06N": 32518, "B81B": 32106, "B64C": 32271, "C10F": 32519, "H03B": 32194, "B01D": 32335, "A61G": 32443, "A01D": 32498, "A63G": 32364, "G09F": 32724, "F16H": 32539, "G10F": 32300, "F02D": 32427, "G06T": 32520, "B27L": 32248, "F21Y": 32666, "A23C": 32357, "G05B": 32209, "F04F": 32639, "B22D": 32590, "E21B": 32510, "F02F": 32552, "F41A": 32711, "F16G": 32136, "E05G": 32707, "B27H": 32307, "E05F": 32453, "B63J": 32660, "G01T": 32480, "C04B": 32688, "D01C": 32207, "H02P": 32649, "F27B": 32536, "F02K": 32705, "G03F": 32377, "G01N": 32635, "D06G": 32463, "B31B": 32721, "C30B": 32496, "F24T": 32603, "C25D": 32291, "B65B": 32668, "C10L": 32265, "C06B": 32583, "B41F": 32412, "G06K": 32222, "B26F": 32299, "F24H": 32177, "H01K": 32704, "B62L": 32565, "B01J": 32344, "E03F": 32183, "B24B": 32128, "C11D": 32452, "B43K": 32129, "B27K": 32720, "C01F": 32231, "H05C": 32212, "D04B": 32547, "B60L": 32413, "C12H": 32272, "B21C": 32255, "B42D": 32256, "F28D": 32445, "C01G": 32281, "B24D": 32389, "B31F": 32433, "F42B": 32606, "B44D": 32675, "B60H": 32371, "A01B": 32394, "B67D": 32327, "B63H": 32722, "H04M": 32600, "G10D": 32418, "A01H": 32461, "G04G": 32181, "F01M": 32471, "H01F": 32161, "F16D": 32571, "F23B": 32148, "A41H": 32642, "E03D": 32419, "E01D": 32575, "A01P": 32180, "E04C": 32476, "G07B": 32586, "C09B": 32726, "C08J": 32624, "A41D": 32190, "D02J": 32573, "E03B": 32250, "H01R": 32356, "TITLE": 32737, "H05K": 32594, "B60W": 32270, "G09C": 32213, "B62B": 32647, "F16J": 32296, "B23F": 32236, "G07D": 32554, "C09D": 32178, "B62K": 32157, "B41D": 32469, "F04C": 32535, "F01D": 32659, "G21D": 32447, "A43C": 32648, "A42B": 32244, "G01L": 32460, "G01W": 32415, "A44B": 32261, "A46D": 32567, "B41K": 32658, "B44F": 32314, "C13C": 32184, "C02F": 32584, "G02C": 32486, "A21C": 32626, "G06F": 32638, "C09C": 32540, "B60S": 32393, "A42C": 32249, "H02H": 32618, "G01C": 32488, "B82Y": 32625, "B21F": 32219, "F23M": 32313, "H04H": 32426, "F41J": 32380, "C05F": 32616, "F16N": 32725, "G06G": 32406, "H02S": 32525, "G05F": 32333, "A47C": 32343, "B60B": 32408, "A63F": 32696, "B60F": 32464, "B23H": 32246, "B44B": 32258, "G08G": 32736, "G09G": 32459, "F24B": 32627, "F16F": 32326, "F21K": 32417, "E02C": 32125, "A22B": 32644, "B68C": 32397, "H04K": 32386, "C09K": 32135, "A23J": 32336, "G01V": 32254, "H02K": 32188, "B64B": 32322, "H03J": 32359, "B42F": 32200, "B21G": 32728, "F01P": 32604, "CLAIMS": 32738, "B27N": 32124, "A01F": 32508, "B27M": 32446, "A63K": 32398, "B23G": 32710, "D04C": 32341, "H01P": 32475, "C10J": 32162, "B26B": 32532, "F03H": 32260, "C03B": 32182, "H03F": 32401, "C07C": 32470, "B68F": 32107, "G06E": 32601, "B26D": 32388, "F17D": 32350, "B65H": 32253, "A63C": 32317, "B64G": 32533, "B27C": 32353, "G10B": 32202, "H01Q": 32487, "D02G": 32531, "A61C": 32109, "F24C": 32224, "B22C": 32103, "E99Z": 32104, "B28B": 32640, "B25F": 32621, "D21B": 32282, "C12M": 32477, "F27D": 32633, "B21D": 32607, "C05G": 32491, "F04D": 32110, "B27D": 32105, "G04D": 32198, "A61M": 32632, "B23D": 32416, "B63G": 32347, "G01D": 32286, "F01C": 32120, "D01D": 32577, "A61Q": 32679, "H03L": 32576, "F41B": 32285, "F04B": 32661, "F25C": 32551, "B01L": 32361, "A63H": 32358, "B21J": 32429, "B66C": 32160, "F28F": 32543, "F28B": 32492, "B67C": 32100, "C14C": 32437, "B31C": 32456, "C07G": 32570, "C06D": 32546, "G02F": 32243, "B41B": 32582, "F17B": 32150, "F02B": 32365, "A43B": 32436, "F41C": 32512, "G06J": 32269, "F23L": 32732, "D21J": 32694, "C13K": 32225, "C05B": 32195, "C23G": 32264, "F21V": 32352, "C05C": 32211, "B66D": 32524, "F16K": 32423, "A01L": 32513, "D21C": 32127, "G06C": 32310, "B02C": 32700, "E06C": 32379, "C12Q": 32555, "E21C": 32137, "B04C": 32259, "G21K": 32563, "A41C": 32641, "H03M": 32681, "B25D": 32716, "B09B": 32273, "A61P": 32306, "B03B": 32143, "D06B": 32514, "G04B": 32174, "B02B": 32568, "F16B": 32653, "F23R": 32629, "F41F": 32305, "E21D": 32615, "A61L": 32112, "B21K": 32400, "C07F": 32507, "F17C": 32276, "H05F": 32678, "B42C": 32560, "B27J": 32612, "F25J": 32448, "B29D": 32206, "F26B": 32670, "A47F": 32558, "A23L": 32295, "G06N": 32342, "B61J": 32168, "G10H": 32214, "C09G": 32654, "B25H": 32187, "C23C": 32165, "B41L": 32676, "A62B": 32695, "G07G": 32387, "B23K": 32391, "D04H": 32297, "A47J": 32511, "B61K": 32605, "A23D": 32572, "C23D": 32312, "C12F": 32210, "A45D": 32139, "G01K": 32482, "A61B": 32349, "F02N": 32689, "D01B": 32727, "A01C": 32339, "D21D": 32574, "B28C": 32434, "G10L": 32381, "F28C": 32245, "C10M": 32596, "B60P": 32538, "C13D": 32609, "B61H": 32717, "G09D": 32146, "C03C": 32630, "B28D": 32173, "D04G": 32294, "A61F": 32275, "F15C": 32403, "D21F": 32611, "D03C": 32121, "G03C": 32521, "B01B": 32163, "B23C": 32170, "A01J": 32159, "B41G": 32430, "A24F": 32324, "C14B": 32425, "C10K": 32652, "A41B": 32192, "B62C": 32646, "F25B": 32440, "G07F": 32481, "G10C": 32289, "H02M": 32506, "A47H": 32505, "B68G": 32686, "A21B": 32262, "G02B": 32411, "B21L": 32199, "G01F": 32730, "B43M": 32331, "C13J": 32375, "C07K": 32683, "B04B": 32579, "G08B": 32147, "F01K": 32197, "F16M": 32330, "D05B": 32114, "F23G": 32328, "C25C": 32592, "H04J": 32280, "D03J": 32545, "D05C": 32321, "A22C": 32346, "F24V": 32409, "B41M": 32585, "D06L": 32703, "B32B": 32435, "B23B": 32428, "H05B": 32376, "F28G": 32338, "B65G": 32340, "B61G": 32325, "F02M": 32242, "D06J": 32450, "B05B": 32712, "B60N": 32715, "G05D": 32315, "A61D": 32598, "B62H": 32610, "A01N": 32517, "B27G": 32283, "H04W": 32682, "F16C": 32537, "D04D": 32500, "A23F": 32462, "B66F": 32367, "E01F": 32238, "B24C": 32204, "E04D": 32504, "C12N": 32374, "B66B": 32267, "B22F": 32410, "A47K": 32530, "F41G": 32115, "B64F": 32485, "G21C": 32263, "C06C": 32645, "F03C": 32493, "G03H": 32697, "B62D": 32723, "F02G": 32108, "F16L": 32473, "G01H": 32318, "H01T": 32140, "B65F": 32279, "F02C": 32241, "F15B": 32383, "B31D": 32699, "C08C": 32729, "F25D": 32145, "A63J": 32169, "B33Y": 32623, "A45C": 32404, "C40B": 32569, "A23B": 32441, "C01B": 32527, "D06F": 32355, "A61K": 32655, "C23F": 32189, "F22G": 32478, "B81C": 32643, "A45F": 32628, "E02F": 32619, "A63B": 32673, "A47G": 32684, "F22D": 32185, "F16T": 32247, "B27B": 32483, "C01C": 32293, "B41C": 32257, "F15D": 32101, "C12J": 32484, "B62M": 32550, "H05H": 32693, "B61F": 32674, "B60Q": 32122, "H01M": 32614, "B29C": 32354, "A24C": 32302, "H03C": 32366, "B30B": 32126, "H04B": 32702, "F21S": 32268, "C11B": 32432, "F24J": 32151, "G09B": 32266, "B82B": 32240, "E05B": 32656, "G06M": 32444, "G11B": 32133, "G01P": 32158, "C08B": 32438, "C22B": 32557, "G12B": 32589, "B06B": 32229, "E02B": 32669, "B60C": 32369, "F21L": 32548, "F42C": 32439, "B60R": 32599, "B64D": 32559, "G11C": 32323, "F24S": 32360, "B03D": 32382, "F24D": 32526, "D06Q": 32454, "C07B": 32516, "A61H": 32490, "A41G": 32196, "A23G": 32167, "B61B": 32334, "C10G": 32373, "A43D": 32564, "H04R": 32345, "B44C": 32713, "G08C": 32502, "B60G": 32680, "B60M": 32407, "A01G": 32384, "B43L": 32455, "F23H": 32472, "B62J": 32636, "C08K": 32399, "H01B": 32370, "C22F": 32123, "G01S": 32719, "F01N": 32385, "F99Z": 32672, "A24D": 32622, "B01F": 32230, "D06C": 32562, "A23N": 32390, "H01H": 32131, "B60V": 32580, "E04H": 32494, "H04S": 32553, "C10H": 32217, "D21H": 32706, "G01B": 32132, "A41F": 32734, "A47D": 32581, "B09C": 32613, "F01L": 32142, "F16P": 32351, "C08F": 32298, "F21W": 32176, "H01C": 32309, "B05D": 32164, "B07B": 32175, "H03D": 32239, "C08L": 32431, "C10B": 32657, "B61L": 32667, "C25B": 32216, "B65D": 32631, "F23Q": 32118, "F42D": 32226, "G04C": 32301, "B63C": 32234, "B05C": 32637, "C99Z": 32620, "H02B": 32634, "H01G": 32608, "A62C": 32171, "D01G": 32154, "E06B": 32503, "F23K": 32499, "C12P": 32731, "F22B": 32156, "A63D": 32130, "F23C": 32534, "B25G": 32117, "G06Q": 32509, "C21C": 32228, "F16S": 32290, "C13F": 32421, "H99Z": 32541, "B29B": 32203, "C08G": 32735, "A44C": 32690, "B68B": 32102, "F23N": 32252, "D06P": 32223, "H01L": 32602, "D01F": 32671, "H01J": 32698, "C12G": 32522, "B41N": 32113, "H03H": 32578, "C09J": 32232, "G01Q": 32405, "G03G": 32709, "H03G": 32208, "F03B": 32316, "C07H": 32549, "C13B": 32664, "G01M": 32528, "F03D": 32523, "E03C": 32597, "B65C": 32465, "E04G": 32542, "B60D": 32308, "H04L": 32172, "B21H": 32593, "G01J": 32501, "C07J": 32378, "E01B": 32651, "E04F": 32205, "A01K": 32166, "G01G": 32529, "C09F": 32277, "H01S": 32663, "G16H": 32116, "H04N": 32497, "C01D": 32363, "E05C": 32284, "H02N": 32685, "C10C": 32687, "D06H": 32215, "B41J": 32395, "H04Q": 32372, "B25B": 32278, "G04R": 32479, "B60T": 32396}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "sep_token": "<pad>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/nlp/scr/msuzgun/nlp-fundamentals/t5-small", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}