haes95 commited on
Commit
32ebdf8
β€’
1 Parent(s): 2715ec8

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -19,7 +19,7 @@
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
- "pad_token": "</s>",
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "pad_token": "<unk>",
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -2,11 +2,20 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": null,
 
 
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
@@ -103,6 +112,12 @@
103
  "id": "A",
104
  "type_id": 0
105
  }
 
 
 
 
 
 
106
  }
107
  ],
108
  "pair": [
@@ -118,6 +133,12 @@
118
  "type_id": 0
119
  }
120
  },
 
 
 
 
 
 
121
  {
122
  "SpecialToken": {
123
  "id": "<s>",
@@ -129,9 +150,24 @@
129
  "id": "B",
130
  "type_id": 1
131
  }
 
 
 
 
 
 
132
  }
133
  ],
134
  "special_tokens": {
 
 
 
 
 
 
 
 
 
135
  "<s>": {
136
  "id": "<s>",
137
  "ids": [
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 691,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 691
12
+ },
13
+ "direction": "Left",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<unk>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
112
  "id": "A",
113
  "type_id": 0
114
  }
115
+ },
116
+ {
117
+ "SpecialToken": {
118
+ "id": "</s>",
119
+ "type_id": 0
120
+ }
121
  }
122
  ],
123
  "pair": [
 
133
  "type_id": 0
134
  }
135
  },
136
+ {
137
+ "SpecialToken": {
138
+ "id": "</s>",
139
+ "type_id": 0
140
+ }
141
+ },
142
  {
143
  "SpecialToken": {
144
  "id": "<s>",
 
150
  "id": "B",
151
  "type_id": 1
152
  }
153
+ },
154
+ {
155
+ "SpecialToken": {
156
+ "id": "</s>",
157
+ "type_id": 1
158
+ }
159
  }
160
  ],
161
  "special_tokens": {
162
+ "</s>": {
163
+ "id": "</s>",
164
+ "ids": [
165
+ 2
166
+ ],
167
+ "tokens": [
168
+ "</s>"
169
+ ]
170
+ },
171
  "<s>": {
172
  "id": "<s>",
173
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -74,7 +74,7 @@
74
  "legacy": null,
75
  "middle_token": "▁<MID>",
76
  "model_max_length": 1000000000000000019884624838656,
77
- "pad_token": "</s>",
78
  "prefix_token": "▁<PRE>",
79
  "sp_model_kwargs": {},
80
  "suffix_token": "▁<SUF>",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
74
  "legacy": null,
75
  "middle_token": "▁<MID>",
76
  "model_max_length": 1000000000000000019884624838656,
77
+ "pad_token": "<unk>",
78
  "prefix_token": "▁<PRE>",
79
  "sp_model_kwargs": {},
80
  "suffix_token": "▁<SUF>",