soldni commited on
Commit
8571ec7
1 Parent(s): 54042e6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +73 -23
  2. tokenizer_config.json +47 -23
tokenizer.json CHANGED
@@ -22,7 +22,7 @@
22
  "special": true
23
  },
24
  {
25
- "id": 50257,
26
  "content": " ",
27
  "single_word": false,
28
  "lstrip": false,
@@ -31,7 +31,7 @@
31
  "special": false
32
  },
33
  {
34
- "id": 50258,
35
  "content": " ",
36
  "single_word": false,
37
  "lstrip": false,
@@ -40,7 +40,7 @@
40
  "special": false
41
  },
42
  {
43
- "id": 50259,
44
  "content": " ",
45
  "single_word": false,
46
  "lstrip": false,
@@ -49,7 +49,7 @@
49
  "special": false
50
  },
51
  {
52
- "id": 50260,
53
  "content": " ",
54
  "single_word": false,
55
  "lstrip": false,
@@ -58,7 +58,7 @@
58
  "special": false
59
  },
60
  {
61
- "id": 50261,
62
  "content": " ",
63
  "single_word": false,
64
  "lstrip": false,
@@ -67,7 +67,7 @@
67
  "special": false
68
  },
69
  {
70
- "id": 50262,
71
  "content": " ",
72
  "single_word": false,
73
  "lstrip": false,
@@ -76,7 +76,7 @@
76
  "special": false
77
  },
78
  {
79
- "id": 50263,
80
  "content": " ",
81
  "single_word": false,
82
  "lstrip": false,
@@ -85,7 +85,7 @@
85
  "special": false
86
  },
87
  {
88
- "id": 50264,
89
  "content": " ",
90
  "single_word": false,
91
  "lstrip": false,
@@ -94,7 +94,7 @@
94
  "special": false
95
  },
96
  {
97
- "id": 50265,
98
  "content": " ",
99
  "single_word": false,
100
  "lstrip": false,
@@ -103,7 +103,7 @@
103
  "special": false
104
  },
105
  {
106
- "id": 50266,
107
  "content": " ",
108
  "single_word": false,
109
  "lstrip": false,
@@ -112,7 +112,7 @@
112
  "special": false
113
  },
114
  {
115
- "id": 50267,
116
  "content": " ",
117
  "single_word": false,
118
  "lstrip": false,
@@ -121,7 +121,7 @@
121
  "special": false
122
  },
123
  {
124
- "id": 50268,
125
  "content": " ",
126
  "single_word": false,
127
  "lstrip": false,
@@ -130,7 +130,7 @@
130
  "special": false
131
  },
132
  {
133
- "id": 50269,
134
  "content": " ",
135
  "single_word": false,
136
  "lstrip": false,
@@ -139,7 +139,7 @@
139
  "special": false
140
  },
141
  {
142
- "id": 50270,
143
  "content": " ",
144
  "single_word": false,
145
  "lstrip": false,
@@ -148,7 +148,7 @@
148
  "special": false
149
  },
150
  {
151
- "id": 50271,
152
  "content": " ",
153
  "single_word": false,
154
  "lstrip": false,
@@ -157,7 +157,7 @@
157
  "special": false
158
  },
159
  {
160
- "id": 50272,
161
  "content": " ",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +166,7 @@
166
  "special": false
167
  },
168
  {
169
- "id": 50273,
170
  "content": " ",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +175,7 @@
175
  "special": false
176
  },
177
  {
178
- "id": 50274,
179
  "content": " ",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +184,7 @@
184
  "special": false
185
  },
186
  {
187
- "id": 50275,
188
  "content": " ",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +193,7 @@
193
  "special": false
194
  },
195
  {
196
- "id": 50276,
197
  "content": " ",
198
  "single_word": false,
199
  "lstrip": false,
@@ -201,6 +201,33 @@
201
  "normalized": true,
202
  "special": false
203
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  {
205
  "id": 50277,
206
  "content": "|||EMAIL_ADDRESS|||",
@@ -221,12 +248,12 @@
221
  },
222
  {
223
  "id": 50279,
224
- "content": " ",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": true,
229
- "special": false
230
  }
231
  ],
232
  "normalizer": {
@@ -50513,6 +50540,29 @@
50513
  "Ġfortified": 50251,
50514
  "ferenced": 50252,
50515
  "ĠOutcomes": 50253,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50516
  "|||EMAIL_ADDRESS|||": 50277,
50517
  "|||PHONE_NUMBER|||": 50278,
50518
  "<|endoftext|>": 50279
 
22
  "special": true
23
  },
24
  {
25
+ "id": 50254,
26
  "content": " ",
27
  "single_word": false,
28
  "lstrip": false,
 
31
  "special": false
32
  },
33
  {
34
+ "id": 50255,
35
  "content": " ",
36
  "single_word": false,
37
  "lstrip": false,
 
40
  "special": false
41
  },
42
  {
43
+ "id": 50256,
44
  "content": " ",
45
  "single_word": false,
46
  "lstrip": false,
 
49
  "special": false
50
  },
51
  {
52
+ "id": 50257,
53
  "content": " ",
54
  "single_word": false,
55
  "lstrip": false,
 
58
  "special": false
59
  },
60
  {
61
+ "id": 50258,
62
  "content": " ",
63
  "single_word": false,
64
  "lstrip": false,
 
67
  "special": false
68
  },
69
  {
70
+ "id": 50259,
71
  "content": " ",
72
  "single_word": false,
73
  "lstrip": false,
 
76
  "special": false
77
  },
78
  {
79
+ "id": 50260,
80
  "content": " ",
81
  "single_word": false,
82
  "lstrip": false,
 
85
  "special": false
86
  },
87
  {
88
+ "id": 50261,
89
  "content": " ",
90
  "single_word": false,
91
  "lstrip": false,
 
94
  "special": false
95
  },
96
  {
97
+ "id": 50262,
98
  "content": " ",
99
  "single_word": false,
100
  "lstrip": false,
 
103
  "special": false
104
  },
105
  {
106
+ "id": 50263,
107
  "content": " ",
108
  "single_word": false,
109
  "lstrip": false,
 
112
  "special": false
113
  },
114
  {
115
+ "id": 50264,
116
  "content": " ",
117
  "single_word": false,
118
  "lstrip": false,
 
121
  "special": false
122
  },
123
  {
124
+ "id": 50265,
125
  "content": " ",
126
  "single_word": false,
127
  "lstrip": false,
 
130
  "special": false
131
  },
132
  {
133
+ "id": 50266,
134
  "content": " ",
135
  "single_word": false,
136
  "lstrip": false,
 
139
  "special": false
140
  },
141
  {
142
+ "id": 50267,
143
  "content": " ",
144
  "single_word": false,
145
  "lstrip": false,
 
148
  "special": false
149
  },
150
  {
151
+ "id": 50268,
152
  "content": " ",
153
  "single_word": false,
154
  "lstrip": false,
 
157
  "special": false
158
  },
159
  {
160
+ "id": 50269,
161
  "content": " ",
162
  "single_word": false,
163
  "lstrip": false,
 
166
  "special": false
167
  },
168
  {
169
+ "id": 50270,
170
  "content": " ",
171
  "single_word": false,
172
  "lstrip": false,
 
175
  "special": false
176
  },
177
  {
178
+ "id": 50271,
179
  "content": " ",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": false
185
  },
186
  {
187
+ "id": 50272,
188
  "content": " ",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": false
194
  },
195
  {
196
+ "id": 50273,
197
  "content": " ",
198
  "single_word": false,
199
  "lstrip": false,
 
201
  "normalized": true,
202
  "special": false
203
  },
204
+ {
205
+ "id": 50274,
206
+ "content": " ",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": true,
211
+ "special": false
212
+ },
213
+ {
214
+ "id": 50275,
215
+ "content": " ",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": true,
220
+ "special": false
221
+ },
222
+ {
223
+ "id": 50276,
224
+ "content": " ",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": true,
229
+ "special": false
230
+ },
231
  {
232
  "id": 50277,
233
  "content": "|||EMAIL_ADDRESS|||",
 
248
  },
249
  {
250
  "id": 50279,
251
+ "content": "<|endoftext|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
  }
258
  ],
259
  "normalizer": {
 
50540
  "Ġfortified": 50251,
50541
  "ferenced": 50252,
50542
  "ĠOutcomes": 50253,
50543
+ " ": 50254,
50544
+ " ": 50255,
50545
+ " ": 50256,
50546
+ " ": 50257,
50547
+ " ": 50258,
50548
+ " ": 50259,
50549
+ " ": 50260,
50550
+ " ": 50261,
50551
+ " ": 50262,
50552
+ " ": 50263,
50553
+ " ": 50264,
50554
+ " ": 50265,
50555
+ " ": 50266,
50556
+ " ": 50267,
50557
+ " ": 50268,
50558
+ " ": 50269,
50559
+ " ": 50270,
50560
+ " ": 50271,
50561
+ " ": 50272,
50562
+ " ": 50273,
50563
+ " ": 50274,
50564
+ " ": 50275,
50565
+ " ": 50276,
50566
  "|||EMAIL_ADDRESS|||": 50277,
50567
  "|||PHONE_NUMBER|||": 50278,
50568
  "<|endoftext|>": 50279
tokenizer_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "single_word": false,
18
  "special": true
19
  },
20
- "50257": {
21
  "content": " ",
22
  "lstrip": false,
23
  "normalized": true,
@@ -25,7 +25,7 @@
25
  "single_word": false,
26
  "special": false
27
  },
28
- "50258": {
29
  "content": " ",
30
  "lstrip": false,
31
  "normalized": true,
@@ -33,7 +33,7 @@
33
  "single_word": false,
34
  "special": false
35
  },
36
- "50259": {
37
  "content": " ",
38
  "lstrip": false,
39
  "normalized": true,
@@ -41,7 +41,7 @@
41
  "single_word": false,
42
  "special": false
43
  },
44
- "50260": {
45
  "content": " ",
46
  "lstrip": false,
47
  "normalized": true,
@@ -49,7 +49,7 @@
49
  "single_word": false,
50
  "special": false
51
  },
52
- "50261": {
53
  "content": " ",
54
  "lstrip": false,
55
  "normalized": true,
@@ -57,7 +57,7 @@
57
  "single_word": false,
58
  "special": false
59
  },
60
- "50262": {
61
  "content": " ",
62
  "lstrip": false,
63
  "normalized": true,
@@ -65,7 +65,7 @@
65
  "single_word": false,
66
  "special": false
67
  },
68
- "50263": {
69
  "content": " ",
70
  "lstrip": false,
71
  "normalized": true,
@@ -73,7 +73,7 @@
73
  "single_word": false,
74
  "special": false
75
  },
76
- "50264": {
77
  "content": " ",
78
  "lstrip": false,
79
  "normalized": true,
@@ -81,7 +81,7 @@
81
  "single_word": false,
82
  "special": false
83
  },
84
- "50265": {
85
  "content": " ",
86
  "lstrip": false,
87
  "normalized": true,
@@ -89,7 +89,7 @@
89
  "single_word": false,
90
  "special": false
91
  },
92
- "50266": {
93
  "content": " ",
94
  "lstrip": false,
95
  "normalized": true,
@@ -97,7 +97,7 @@
97
  "single_word": false,
98
  "special": false
99
  },
100
- "50267": {
101
  "content": " ",
102
  "lstrip": false,
103
  "normalized": true,
@@ -105,7 +105,7 @@
105
  "single_word": false,
106
  "special": false
107
  },
108
- "50268": {
109
  "content": " ",
110
  "lstrip": false,
111
  "normalized": true,
@@ -113,7 +113,7 @@
113
  "single_word": false,
114
  "special": false
115
  },
116
- "50269": {
117
  "content": " ",
118
  "lstrip": false,
119
  "normalized": true,
@@ -121,7 +121,7 @@
121
  "single_word": false,
122
  "special": false
123
  },
124
- "50270": {
125
  "content": " ",
126
  "lstrip": false,
127
  "normalized": true,
@@ -129,7 +129,7 @@
129
  "single_word": false,
130
  "special": false
131
  },
132
- "50271": {
133
  "content": " ",
134
  "lstrip": false,
135
  "normalized": true,
@@ -137,7 +137,7 @@
137
  "single_word": false,
138
  "special": false
139
  },
140
- "50272": {
141
  "content": " ",
142
  "lstrip": false,
143
  "normalized": true,
@@ -145,7 +145,7 @@
145
  "single_word": false,
146
  "special": false
147
  },
148
- "50273": {
149
  "content": " ",
150
  "lstrip": false,
151
  "normalized": true,
@@ -153,7 +153,7 @@
153
  "single_word": false,
154
  "special": false
155
  },
156
- "50274": {
157
  "content": " ",
158
  "lstrip": false,
159
  "normalized": true,
@@ -161,7 +161,7 @@
161
  "single_word": false,
162
  "special": false
163
  },
164
- "50275": {
165
  "content": " ",
166
  "lstrip": false,
167
  "normalized": true,
@@ -169,7 +169,7 @@
169
  "single_word": false,
170
  "special": false
171
  },
172
- "50276": {
173
  "content": " ",
174
  "lstrip": false,
175
  "normalized": true,
@@ -177,6 +177,30 @@
177
  "single_word": false,
178
  "special": false
179
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  "50277": {
181
  "content": "|||EMAIL_ADDRESS|||",
182
  "lstrip": false,
@@ -194,12 +218,12 @@
194
  "special": false
195
  },
196
  "50279": {
197
- "content": " ",
198
  "lstrip": false,
199
- "normalized": true,
200
  "rstrip": false,
201
  "single_word": false,
202
- "special": false
203
  }
204
  },
205
  "additional_special_tokens": [],
 
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "50254": {
21
  "content": " ",
22
  "lstrip": false,
23
  "normalized": true,
 
25
  "single_word": false,
26
  "special": false
27
  },
28
+ "50255": {
29
  "content": " ",
30
  "lstrip": false,
31
  "normalized": true,
 
33
  "single_word": false,
34
  "special": false
35
  },
36
+ "50256": {
37
  "content": " ",
38
  "lstrip": false,
39
  "normalized": true,
 
41
  "single_word": false,
42
  "special": false
43
  },
44
+ "50257": {
45
  "content": " ",
46
  "lstrip": false,
47
  "normalized": true,
 
49
  "single_word": false,
50
  "special": false
51
  },
52
+ "50258": {
53
  "content": " ",
54
  "lstrip": false,
55
  "normalized": true,
 
57
  "single_word": false,
58
  "special": false
59
  },
60
+ "50259": {
61
  "content": " ",
62
  "lstrip": false,
63
  "normalized": true,
 
65
  "single_word": false,
66
  "special": false
67
  },
68
+ "50260": {
69
  "content": " ",
70
  "lstrip": false,
71
  "normalized": true,
 
73
  "single_word": false,
74
  "special": false
75
  },
76
+ "50261": {
77
  "content": " ",
78
  "lstrip": false,
79
  "normalized": true,
 
81
  "single_word": false,
82
  "special": false
83
  },
84
+ "50262": {
85
  "content": " ",
86
  "lstrip": false,
87
  "normalized": true,
 
89
  "single_word": false,
90
  "special": false
91
  },
92
+ "50263": {
93
  "content": " ",
94
  "lstrip": false,
95
  "normalized": true,
 
97
  "single_word": false,
98
  "special": false
99
  },
100
+ "50264": {
101
  "content": " ",
102
  "lstrip": false,
103
  "normalized": true,
 
105
  "single_word": false,
106
  "special": false
107
  },
108
+ "50265": {
109
  "content": " ",
110
  "lstrip": false,
111
  "normalized": true,
 
113
  "single_word": false,
114
  "special": false
115
  },
116
+ "50266": {
117
  "content": " ",
118
  "lstrip": false,
119
  "normalized": true,
 
121
  "single_word": false,
122
  "special": false
123
  },
124
+ "50267": {
125
  "content": " ",
126
  "lstrip": false,
127
  "normalized": true,
 
129
  "single_word": false,
130
  "special": false
131
  },
132
+ "50268": {
133
  "content": " ",
134
  "lstrip": false,
135
  "normalized": true,
 
137
  "single_word": false,
138
  "special": false
139
  },
140
+ "50269": {
141
  "content": " ",
142
  "lstrip": false,
143
  "normalized": true,
 
145
  "single_word": false,
146
  "special": false
147
  },
148
+ "50270": {
149
  "content": " ",
150
  "lstrip": false,
151
  "normalized": true,
 
153
  "single_word": false,
154
  "special": false
155
  },
156
+ "50271": {
157
  "content": " ",
158
  "lstrip": false,
159
  "normalized": true,
 
161
  "single_word": false,
162
  "special": false
163
  },
164
+ "50272": {
165
  "content": " ",
166
  "lstrip": false,
167
  "normalized": true,
 
169
  "single_word": false,
170
  "special": false
171
  },
172
+ "50273": {
173
  "content": " ",
174
  "lstrip": false,
175
  "normalized": true,
 
177
  "single_word": false,
178
  "special": false
179
  },
180
+ "50274": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50275": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50276": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
  "50277": {
205
  "content": "|||EMAIL_ADDRESS|||",
206
  "lstrip": false,
 
218
  "special": false
219
  },
220
  "50279": {
221
+ "content": "<|endoftext|>",
222
  "lstrip": false,
223
+ "normalized": false,
224
  "rstrip": false,
225
  "single_word": false,
226
+ "special": true
227
  }
228
  },
229
  "additional_special_tokens": [],