relik-ie commited on
Commit
f1b719d
1 Parent(s): 04f4497

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,42 +1,41 @@
1
  {
2
- "--COREF--": 128001,
3
- "--NME--": 128027,
4
- "[E-0]": 128028,
5
- "[E-10]": 128038,
6
- "[E-11]": 128039,
7
- "[E-1]": 128029,
8
- "[E-2]": 128030,
9
- "[E-3]": 128031,
10
- "[E-4]": 128032,
11
- "[E-5]": 128033,
12
- "[E-6]": 128034,
13
- "[E-7]": 128035,
14
- "[E-8]": 128036,
15
- "[E-9]": 128037,
16
  "[MASK]": 128000,
17
- "[R-0]": 128002,
18
- "[R-10]": 128012,
19
- "[R-11]": 128013,
20
- "[R-12]": 128014,
21
- "[R-13]": 128015,
22
- "[R-14]": 128016,
23
- "[R-15]": 128017,
24
- "[R-16]": 128018,
25
- "[R-17]": 128019,
26
- "[R-18]": 128020,
27
- "[R-19]": 128021,
28
- "[R-1]": 128003,
29
- "[R-20]": 128022,
30
- "[R-21]": 128023,
31
- "[R-22]": 128024,
32
- "[R-23]": 128025,
33
- "[R-24]": 128026,
34
- "[R-2]": 128004,
35
- "[R-3]": 128005,
36
- "[R-4]": 128006,
37
- "[R-5]": 128007,
38
- "[R-6]": 128008,
39
- "[R-7]": 128009,
40
- "[R-8]": 128010,
41
- "[R-9]": 128011
42
  }
 
1
  {
2
+ "--NME--": 128026,
3
+ "[E-0]": 128027,
4
+ "[E-10]": 128037,
5
+ "[E-11]": 128038,
6
+ "[E-1]": 128028,
7
+ "[E-2]": 128029,
8
+ "[E-3]": 128030,
9
+ "[E-4]": 128031,
10
+ "[E-5]": 128032,
11
+ "[E-6]": 128033,
12
+ "[E-7]": 128034,
13
+ "[E-8]": 128035,
14
+ "[E-9]": 128036,
 
15
  "[MASK]": 128000,
16
+ "[R-0]": 128001,
17
+ "[R-10]": 128011,
18
+ "[R-11]": 128012,
19
+ "[R-12]": 128013,
20
+ "[R-13]": 128014,
21
+ "[R-14]": 128015,
22
+ "[R-15]": 128016,
23
+ "[R-16]": 128017,
24
+ "[R-17]": 128018,
25
+ "[R-18]": 128019,
26
+ "[R-19]": 128020,
27
+ "[R-1]": 128002,
28
+ "[R-20]": 128021,
29
+ "[R-21]": 128022,
30
+ "[R-22]": 128023,
31
+ "[R-23]": 128024,
32
+ "[R-24]": 128025,
33
+ "[R-2]": 128003,
34
+ "[R-3]": 128004,
35
+ "[R-4]": 128005,
36
+ "[R-5]": 128006,
37
+ "[R-6]": 128007,
38
+ "[R-7]": 128008,
39
+ "[R-8]": 128009,
40
+ "[R-9]": 128010
41
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "additional_special_tokens": [
3
- "--COREF--",
4
  "[R-0]",
5
  "[R-1]",
6
  "[R-2]",
 
1
  {
2
  "additional_special_tokens": [
 
3
  "[R-0]",
4
  "[R-1]",
5
  "[R-2]",
tokenizer.json CHANGED
@@ -50,15 +50,6 @@
50
  },
51
  {
52
  "id": 128001,
53
- "content": "--COREF--",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 128002,
62
  "content": "[R-0]",
63
  "single_word": false,
64
  "lstrip": false,
@@ -67,7 +58,7 @@
67
  "special": true
68
  },
69
  {
70
- "id": 128003,
71
  "content": "[R-1]",
72
  "single_word": false,
73
  "lstrip": false,
@@ -76,7 +67,7 @@
76
  "special": true
77
  },
78
  {
79
- "id": 128004,
80
  "content": "[R-2]",
81
  "single_word": false,
82
  "lstrip": false,
@@ -85,7 +76,7 @@
85
  "special": true
86
  },
87
  {
88
- "id": 128005,
89
  "content": "[R-3]",
90
  "single_word": false,
91
  "lstrip": false,
@@ -94,7 +85,7 @@
94
  "special": true
95
  },
96
  {
97
- "id": 128006,
98
  "content": "[R-4]",
99
  "single_word": false,
100
  "lstrip": false,
@@ -103,7 +94,7 @@
103
  "special": true
104
  },
105
  {
106
- "id": 128007,
107
  "content": "[R-5]",
108
  "single_word": false,
109
  "lstrip": false,
@@ -112,7 +103,7 @@
112
  "special": true
113
  },
114
  {
115
- "id": 128008,
116
  "content": "[R-6]",
117
  "single_word": false,
118
  "lstrip": false,
@@ -121,7 +112,7 @@
121
  "special": true
122
  },
123
  {
124
- "id": 128009,
125
  "content": "[R-7]",
126
  "single_word": false,
127
  "lstrip": false,
@@ -130,7 +121,7 @@
130
  "special": true
131
  },
132
  {
133
- "id": 128010,
134
  "content": "[R-8]",
135
  "single_word": false,
136
  "lstrip": false,
@@ -139,7 +130,7 @@
139
  "special": true
140
  },
141
  {
142
- "id": 128011,
143
  "content": "[R-9]",
144
  "single_word": false,
145
  "lstrip": false,
@@ -148,7 +139,7 @@
148
  "special": true
149
  },
150
  {
151
- "id": 128012,
152
  "content": "[R-10]",
153
  "single_word": false,
154
  "lstrip": false,
@@ -157,7 +148,7 @@
157
  "special": true
158
  },
159
  {
160
- "id": 128013,
161
  "content": "[R-11]",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +157,7 @@
166
  "special": true
167
  },
168
  {
169
- "id": 128014,
170
  "content": "[R-12]",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +166,7 @@
175
  "special": true
176
  },
177
  {
178
- "id": 128015,
179
  "content": "[R-13]",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +175,7 @@
184
  "special": true
185
  },
186
  {
187
- "id": 128016,
188
  "content": "[R-14]",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +184,7 @@
193
  "special": true
194
  },
195
  {
196
- "id": 128017,
197
  "content": "[R-15]",
198
  "single_word": false,
199
  "lstrip": false,
@@ -202,7 +193,7 @@
202
  "special": true
203
  },
204
  {
205
- "id": 128018,
206
  "content": "[R-16]",
207
  "single_word": false,
208
  "lstrip": false,
@@ -211,7 +202,7 @@
211
  "special": true
212
  },
213
  {
214
- "id": 128019,
215
  "content": "[R-17]",
216
  "single_word": false,
217
  "lstrip": false,
@@ -220,7 +211,7 @@
220
  "special": true
221
  },
222
  {
223
- "id": 128020,
224
  "content": "[R-18]",
225
  "single_word": false,
226
  "lstrip": false,
@@ -229,7 +220,7 @@
229
  "special": true
230
  },
231
  {
232
- "id": 128021,
233
  "content": "[R-19]",
234
  "single_word": false,
235
  "lstrip": false,
@@ -238,7 +229,7 @@
238
  "special": true
239
  },
240
  {
241
- "id": 128022,
242
  "content": "[R-20]",
243
  "single_word": false,
244
  "lstrip": false,
@@ -247,7 +238,7 @@
247
  "special": true
248
  },
249
  {
250
- "id": 128023,
251
  "content": "[R-21]",
252
  "single_word": false,
253
  "lstrip": false,
@@ -256,7 +247,7 @@
256
  "special": true
257
  },
258
  {
259
- "id": 128024,
260
  "content": "[R-22]",
261
  "single_word": false,
262
  "lstrip": false,
@@ -265,7 +256,7 @@
265
  "special": true
266
  },
267
  {
268
- "id": 128025,
269
  "content": "[R-23]",
270
  "single_word": false,
271
  "lstrip": false,
@@ -274,7 +265,7 @@
274
  "special": true
275
  },
276
  {
277
- "id": 128026,
278
  "content": "[R-24]",
279
  "single_word": false,
280
  "lstrip": false,
@@ -283,7 +274,7 @@
283
  "special": true
284
  },
285
  {
286
- "id": 128027,
287
  "content": "--NME--",
288
  "single_word": false,
289
  "lstrip": false,
@@ -292,7 +283,7 @@
292
  "special": true
293
  },
294
  {
295
- "id": 128028,
296
  "content": "[E-0]",
297
  "single_word": false,
298
  "lstrip": false,
@@ -301,7 +292,7 @@
301
  "special": true
302
  },
303
  {
304
- "id": 128029,
305
  "content": "[E-1]",
306
  "single_word": false,
307
  "lstrip": false,
@@ -310,7 +301,7 @@
310
  "special": true
311
  },
312
  {
313
- "id": 128030,
314
  "content": "[E-2]",
315
  "single_word": false,
316
  "lstrip": false,
@@ -319,7 +310,7 @@
319
  "special": true
320
  },
321
  {
322
- "id": 128031,
323
  "content": "[E-3]",
324
  "single_word": false,
325
  "lstrip": false,
@@ -328,7 +319,7 @@
328
  "special": true
329
  },
330
  {
331
- "id": 128032,
332
  "content": "[E-4]",
333
  "single_word": false,
334
  "lstrip": false,
@@ -337,7 +328,7 @@
337
  "special": true
338
  },
339
  {
340
- "id": 128033,
341
  "content": "[E-5]",
342
  "single_word": false,
343
  "lstrip": false,
@@ -346,7 +337,7 @@
346
  "special": true
347
  },
348
  {
349
- "id": 128034,
350
  "content": "[E-6]",
351
  "single_word": false,
352
  "lstrip": false,
@@ -355,7 +346,7 @@
355
  "special": true
356
  },
357
  {
358
- "id": 128035,
359
  "content": "[E-7]",
360
  "single_word": false,
361
  "lstrip": false,
@@ -364,7 +355,7 @@
364
  "special": true
365
  },
366
  {
367
- "id": 128036,
368
  "content": "[E-8]",
369
  "single_word": false,
370
  "lstrip": false,
@@ -373,7 +364,7 @@
373
  "special": true
374
  },
375
  {
376
- "id": 128037,
377
  "content": "[E-9]",
378
  "single_word": false,
379
  "lstrip": false,
@@ -382,7 +373,7 @@
382
  "special": true
383
  },
384
  {
385
- "id": 128038,
386
  "content": "[E-10]",
387
  "single_word": false,
388
  "lstrip": false,
@@ -391,7 +382,7 @@
391
  "special": true
392
  },
393
  {
394
- "id": 128039,
395
  "content": "[E-11]",
396
  "single_word": false,
397
  "lstrip": false,
 
50
  },
51
  {
52
  "id": 128001,
 
 
 
 
 
 
 
 
 
53
  "content": "[R-0]",
54
  "single_word": false,
55
  "lstrip": false,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 128002,
62
  "content": "[R-1]",
63
  "single_word": false,
64
  "lstrip": false,
 
67
  "special": true
68
  },
69
  {
70
+ "id": 128003,
71
  "content": "[R-2]",
72
  "single_word": false,
73
  "lstrip": false,
 
76
  "special": true
77
  },
78
  {
79
+ "id": 128004,
80
  "content": "[R-3]",
81
  "single_word": false,
82
  "lstrip": false,
 
85
  "special": true
86
  },
87
  {
88
+ "id": 128005,
89
  "content": "[R-4]",
90
  "single_word": false,
91
  "lstrip": false,
 
94
  "special": true
95
  },
96
  {
97
+ "id": 128006,
98
  "content": "[R-5]",
99
  "single_word": false,
100
  "lstrip": false,
 
103
  "special": true
104
  },
105
  {
106
+ "id": 128007,
107
  "content": "[R-6]",
108
  "single_word": false,
109
  "lstrip": false,
 
112
  "special": true
113
  },
114
  {
115
+ "id": 128008,
116
  "content": "[R-7]",
117
  "single_word": false,
118
  "lstrip": false,
 
121
  "special": true
122
  },
123
  {
124
+ "id": 128009,
125
  "content": "[R-8]",
126
  "single_word": false,
127
  "lstrip": false,
 
130
  "special": true
131
  },
132
  {
133
+ "id": 128010,
134
  "content": "[R-9]",
135
  "single_word": false,
136
  "lstrip": false,
 
139
  "special": true
140
  },
141
  {
142
+ "id": 128011,
143
  "content": "[R-10]",
144
  "single_word": false,
145
  "lstrip": false,
 
148
  "special": true
149
  },
150
  {
151
+ "id": 128012,
152
  "content": "[R-11]",
153
  "single_word": false,
154
  "lstrip": false,
 
157
  "special": true
158
  },
159
  {
160
+ "id": 128013,
161
  "content": "[R-12]",
162
  "single_word": false,
163
  "lstrip": false,
 
166
  "special": true
167
  },
168
  {
169
+ "id": 128014,
170
  "content": "[R-13]",
171
  "single_word": false,
172
  "lstrip": false,
 
175
  "special": true
176
  },
177
  {
178
+ "id": 128015,
179
  "content": "[R-14]",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": true
185
  },
186
  {
187
+ "id": 128016,
188
  "content": "[R-15]",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": true
194
  },
195
  {
196
+ "id": 128017,
197
  "content": "[R-16]",
198
  "single_word": false,
199
  "lstrip": false,
 
202
  "special": true
203
  },
204
  {
205
+ "id": 128018,
206
  "content": "[R-17]",
207
  "single_word": false,
208
  "lstrip": false,
 
211
  "special": true
212
  },
213
  {
214
+ "id": 128019,
215
  "content": "[R-18]",
216
  "single_word": false,
217
  "lstrip": false,
 
220
  "special": true
221
  },
222
  {
223
+ "id": 128020,
224
  "content": "[R-19]",
225
  "single_word": false,
226
  "lstrip": false,
 
229
  "special": true
230
  },
231
  {
232
+ "id": 128021,
233
  "content": "[R-20]",
234
  "single_word": false,
235
  "lstrip": false,
 
238
  "special": true
239
  },
240
  {
241
+ "id": 128022,
242
  "content": "[R-21]",
243
  "single_word": false,
244
  "lstrip": false,
 
247
  "special": true
248
  },
249
  {
250
+ "id": 128023,
251
  "content": "[R-22]",
252
  "single_word": false,
253
  "lstrip": false,
 
256
  "special": true
257
  },
258
  {
259
+ "id": 128024,
260
  "content": "[R-23]",
261
  "single_word": false,
262
  "lstrip": false,
 
265
  "special": true
266
  },
267
  {
268
+ "id": 128025,
269
  "content": "[R-24]",
270
  "single_word": false,
271
  "lstrip": false,
 
274
  "special": true
275
  },
276
  {
277
+ "id": 128026,
278
  "content": "--NME--",
279
  "single_word": false,
280
  "lstrip": false,
 
283
  "special": true
284
  },
285
  {
286
+ "id": 128027,
287
  "content": "[E-0]",
288
  "single_word": false,
289
  "lstrip": false,
 
292
  "special": true
293
  },
294
  {
295
+ "id": 128028,
296
  "content": "[E-1]",
297
  "single_word": false,
298
  "lstrip": false,
 
301
  "special": true
302
  },
303
  {
304
+ "id": 128029,
305
  "content": "[E-2]",
306
  "single_word": false,
307
  "lstrip": false,
 
310
  "special": true
311
  },
312
  {
313
+ "id": 128030,
314
  "content": "[E-3]",
315
  "single_word": false,
316
  "lstrip": false,
 
319
  "special": true
320
  },
321
  {
322
+ "id": 128031,
323
  "content": "[E-4]",
324
  "single_word": false,
325
  "lstrip": false,
 
328
  "special": true
329
  },
330
  {
331
+ "id": 128032,
332
  "content": "[E-5]",
333
  "single_word": false,
334
  "lstrip": false,
 
337
  "special": true
338
  },
339
  {
340
+ "id": 128033,
341
  "content": "[E-6]",
342
  "single_word": false,
343
  "lstrip": false,
 
346
  "special": true
347
  },
348
  {
349
+ "id": 128034,
350
  "content": "[E-7]",
351
  "single_word": false,
352
  "lstrip": false,
 
355
  "special": true
356
  },
357
  {
358
+ "id": 128035,
359
  "content": "[E-8]",
360
  "single_word": false,
361
  "lstrip": false,
 
364
  "special": true
365
  },
366
  {
367
+ "id": 128036,
368
  "content": "[E-9]",
369
  "single_word": false,
370
  "lstrip": false,
 
373
  "special": true
374
  },
375
  {
376
+ "id": 128037,
377
  "content": "[E-10]",
378
  "single_word": false,
379
  "lstrip": false,
 
382
  "special": true
383
  },
384
  {
385
+ "id": 128038,
386
  "content": "[E-11]",
387
  "single_word": false,
388
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -42,14 +42,6 @@
42
  "special": true
43
  },
44
  "128001": {
45
- "content": "--COREF--",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "128002": {
53
  "content": "[R-0]",
54
  "lstrip": false,
55
  "normalized": false,
@@ -57,7 +49,7 @@
57
  "single_word": false,
58
  "special": true
59
  },
60
- "128003": {
61
  "content": "[R-1]",
62
  "lstrip": false,
63
  "normalized": false,
@@ -65,7 +57,7 @@
65
  "single_word": false,
66
  "special": true
67
  },
68
- "128004": {
69
  "content": "[R-2]",
70
  "lstrip": false,
71
  "normalized": false,
@@ -73,7 +65,7 @@
73
  "single_word": false,
74
  "special": true
75
  },
76
- "128005": {
77
  "content": "[R-3]",
78
  "lstrip": false,
79
  "normalized": false,
@@ -81,7 +73,7 @@
81
  "single_word": false,
82
  "special": true
83
  },
84
- "128006": {
85
  "content": "[R-4]",
86
  "lstrip": false,
87
  "normalized": false,
@@ -89,7 +81,7 @@
89
  "single_word": false,
90
  "special": true
91
  },
92
- "128007": {
93
  "content": "[R-5]",
94
  "lstrip": false,
95
  "normalized": false,
@@ -97,7 +89,7 @@
97
  "single_word": false,
98
  "special": true
99
  },
100
- "128008": {
101
  "content": "[R-6]",
102
  "lstrip": false,
103
  "normalized": false,
@@ -105,7 +97,7 @@
105
  "single_word": false,
106
  "special": true
107
  },
108
- "128009": {
109
  "content": "[R-7]",
110
  "lstrip": false,
111
  "normalized": false,
@@ -113,7 +105,7 @@
113
  "single_word": false,
114
  "special": true
115
  },
116
- "128010": {
117
  "content": "[R-8]",
118
  "lstrip": false,
119
  "normalized": false,
@@ -121,7 +113,7 @@
121
  "single_word": false,
122
  "special": true
123
  },
124
- "128011": {
125
  "content": "[R-9]",
126
  "lstrip": false,
127
  "normalized": false,
@@ -129,7 +121,7 @@
129
  "single_word": false,
130
  "special": true
131
  },
132
- "128012": {
133
  "content": "[R-10]",
134
  "lstrip": false,
135
  "normalized": false,
@@ -137,7 +129,7 @@
137
  "single_word": false,
138
  "special": true
139
  },
140
- "128013": {
141
  "content": "[R-11]",
142
  "lstrip": false,
143
  "normalized": false,
@@ -145,7 +137,7 @@
145
  "single_word": false,
146
  "special": true
147
  },
148
- "128014": {
149
  "content": "[R-12]",
150
  "lstrip": false,
151
  "normalized": false,
@@ -153,7 +145,7 @@
153
  "single_word": false,
154
  "special": true
155
  },
156
- "128015": {
157
  "content": "[R-13]",
158
  "lstrip": false,
159
  "normalized": false,
@@ -161,7 +153,7 @@
161
  "single_word": false,
162
  "special": true
163
  },
164
- "128016": {
165
  "content": "[R-14]",
166
  "lstrip": false,
167
  "normalized": false,
@@ -169,7 +161,7 @@
169
  "single_word": false,
170
  "special": true
171
  },
172
- "128017": {
173
  "content": "[R-15]",
174
  "lstrip": false,
175
  "normalized": false,
@@ -177,7 +169,7 @@
177
  "single_word": false,
178
  "special": true
179
  },
180
- "128018": {
181
  "content": "[R-16]",
182
  "lstrip": false,
183
  "normalized": false,
@@ -185,7 +177,7 @@
185
  "single_word": false,
186
  "special": true
187
  },
188
- "128019": {
189
  "content": "[R-17]",
190
  "lstrip": false,
191
  "normalized": false,
@@ -193,7 +185,7 @@
193
  "single_word": false,
194
  "special": true
195
  },
196
- "128020": {
197
  "content": "[R-18]",
198
  "lstrip": false,
199
  "normalized": false,
@@ -201,7 +193,7 @@
201
  "single_word": false,
202
  "special": true
203
  },
204
- "128021": {
205
  "content": "[R-19]",
206
  "lstrip": false,
207
  "normalized": false,
@@ -209,7 +201,7 @@
209
  "single_word": false,
210
  "special": true
211
  },
212
- "128022": {
213
  "content": "[R-20]",
214
  "lstrip": false,
215
  "normalized": false,
@@ -217,7 +209,7 @@
217
  "single_word": false,
218
  "special": true
219
  },
220
- "128023": {
221
  "content": "[R-21]",
222
  "lstrip": false,
223
  "normalized": false,
@@ -225,7 +217,7 @@
225
  "single_word": false,
226
  "special": true
227
  },
228
- "128024": {
229
  "content": "[R-22]",
230
  "lstrip": false,
231
  "normalized": false,
@@ -233,7 +225,7 @@
233
  "single_word": false,
234
  "special": true
235
  },
236
- "128025": {
237
  "content": "[R-23]",
238
  "lstrip": false,
239
  "normalized": false,
@@ -241,7 +233,7 @@
241
  "single_word": false,
242
  "special": true
243
  },
244
- "128026": {
245
  "content": "[R-24]",
246
  "lstrip": false,
247
  "normalized": false,
@@ -249,7 +241,7 @@
249
  "single_word": false,
250
  "special": true
251
  },
252
- "128027": {
253
  "content": "--NME--",
254
  "lstrip": false,
255
  "normalized": false,
@@ -257,7 +249,7 @@
257
  "single_word": false,
258
  "special": true
259
  },
260
- "128028": {
261
  "content": "[E-0]",
262
  "lstrip": false,
263
  "normalized": false,
@@ -265,7 +257,7 @@
265
  "single_word": false,
266
  "special": true
267
  },
268
- "128029": {
269
  "content": "[E-1]",
270
  "lstrip": false,
271
  "normalized": false,
@@ -273,7 +265,7 @@
273
  "single_word": false,
274
  "special": true
275
  },
276
- "128030": {
277
  "content": "[E-2]",
278
  "lstrip": false,
279
  "normalized": false,
@@ -281,7 +273,7 @@
281
  "single_word": false,
282
  "special": true
283
  },
284
- "128031": {
285
  "content": "[E-3]",
286
  "lstrip": false,
287
  "normalized": false,
@@ -289,7 +281,7 @@
289
  "single_word": false,
290
  "special": true
291
  },
292
- "128032": {
293
  "content": "[E-4]",
294
  "lstrip": false,
295
  "normalized": false,
@@ -297,7 +289,7 @@
297
  "single_word": false,
298
  "special": true
299
  },
300
- "128033": {
301
  "content": "[E-5]",
302
  "lstrip": false,
303
  "normalized": false,
@@ -305,7 +297,7 @@
305
  "single_word": false,
306
  "special": true
307
  },
308
- "128034": {
309
  "content": "[E-6]",
310
  "lstrip": false,
311
  "normalized": false,
@@ -313,7 +305,7 @@
313
  "single_word": false,
314
  "special": true
315
  },
316
- "128035": {
317
  "content": "[E-7]",
318
  "lstrip": false,
319
  "normalized": false,
@@ -321,7 +313,7 @@
321
  "single_word": false,
322
  "special": true
323
  },
324
- "128036": {
325
  "content": "[E-8]",
326
  "lstrip": false,
327
  "normalized": false,
@@ -329,7 +321,7 @@
329
  "single_word": false,
330
  "special": true
331
  },
332
- "128037": {
333
  "content": "[E-9]",
334
  "lstrip": false,
335
  "normalized": false,
@@ -337,7 +329,7 @@
337
  "single_word": false,
338
  "special": true
339
  },
340
- "128038": {
341
  "content": "[E-10]",
342
  "lstrip": false,
343
  "normalized": false,
@@ -345,7 +337,7 @@
345
  "single_word": false,
346
  "special": true
347
  },
348
- "128039": {
349
  "content": "[E-11]",
350
  "lstrip": false,
351
  "normalized": false,
@@ -355,7 +347,6 @@
355
  }
356
  },
357
  "additional_special_tokens": [
358
- "--COREF--",
359
  "[R-0]",
360
  "[R-1]",
361
  "[R-2]",
 
42
  "special": true
43
  },
44
  "128001": {
 
 
 
 
 
 
 
 
45
  "content": "[R-0]",
46
  "lstrip": false,
47
  "normalized": false,
 
49
  "single_word": false,
50
  "special": true
51
  },
52
+ "128002": {
53
  "content": "[R-1]",
54
  "lstrip": false,
55
  "normalized": false,
 
57
  "single_word": false,
58
  "special": true
59
  },
60
+ "128003": {
61
  "content": "[R-2]",
62
  "lstrip": false,
63
  "normalized": false,
 
65
  "single_word": false,
66
  "special": true
67
  },
68
+ "128004": {
69
  "content": "[R-3]",
70
  "lstrip": false,
71
  "normalized": false,
 
73
  "single_word": false,
74
  "special": true
75
  },
76
+ "128005": {
77
  "content": "[R-4]",
78
  "lstrip": false,
79
  "normalized": false,
 
81
  "single_word": false,
82
  "special": true
83
  },
84
+ "128006": {
85
  "content": "[R-5]",
86
  "lstrip": false,
87
  "normalized": false,
 
89
  "single_word": false,
90
  "special": true
91
  },
92
+ "128007": {
93
  "content": "[R-6]",
94
  "lstrip": false,
95
  "normalized": false,
 
97
  "single_word": false,
98
  "special": true
99
  },
100
+ "128008": {
101
  "content": "[R-7]",
102
  "lstrip": false,
103
  "normalized": false,
 
105
  "single_word": false,
106
  "special": true
107
  },
108
+ "128009": {
109
  "content": "[R-8]",
110
  "lstrip": false,
111
  "normalized": false,
 
113
  "single_word": false,
114
  "special": true
115
  },
116
+ "128010": {
117
  "content": "[R-9]",
118
  "lstrip": false,
119
  "normalized": false,
 
121
  "single_word": false,
122
  "special": true
123
  },
124
+ "128011": {
125
  "content": "[R-10]",
126
  "lstrip": false,
127
  "normalized": false,
 
129
  "single_word": false,
130
  "special": true
131
  },
132
+ "128012": {
133
  "content": "[R-11]",
134
  "lstrip": false,
135
  "normalized": false,
 
137
  "single_word": false,
138
  "special": true
139
  },
140
+ "128013": {
141
  "content": "[R-12]",
142
  "lstrip": false,
143
  "normalized": false,
 
145
  "single_word": false,
146
  "special": true
147
  },
148
+ "128014": {
149
  "content": "[R-13]",
150
  "lstrip": false,
151
  "normalized": false,
 
153
  "single_word": false,
154
  "special": true
155
  },
156
+ "128015": {
157
  "content": "[R-14]",
158
  "lstrip": false,
159
  "normalized": false,
 
161
  "single_word": false,
162
  "special": true
163
  },
164
+ "128016": {
165
  "content": "[R-15]",
166
  "lstrip": false,
167
  "normalized": false,
 
169
  "single_word": false,
170
  "special": true
171
  },
172
+ "128017": {
173
  "content": "[R-16]",
174
  "lstrip": false,
175
  "normalized": false,
 
177
  "single_word": false,
178
  "special": true
179
  },
180
+ "128018": {
181
  "content": "[R-17]",
182
  "lstrip": false,
183
  "normalized": false,
 
185
  "single_word": false,
186
  "special": true
187
  },
188
+ "128019": {
189
  "content": "[R-18]",
190
  "lstrip": false,
191
  "normalized": false,
 
193
  "single_word": false,
194
  "special": true
195
  },
196
+ "128020": {
197
  "content": "[R-19]",
198
  "lstrip": false,
199
  "normalized": false,
 
201
  "single_word": false,
202
  "special": true
203
  },
204
+ "128021": {
205
  "content": "[R-20]",
206
  "lstrip": false,
207
  "normalized": false,
 
209
  "single_word": false,
210
  "special": true
211
  },
212
+ "128022": {
213
  "content": "[R-21]",
214
  "lstrip": false,
215
  "normalized": false,
 
217
  "single_word": false,
218
  "special": true
219
  },
220
+ "128023": {
221
  "content": "[R-22]",
222
  "lstrip": false,
223
  "normalized": false,
 
225
  "single_word": false,
226
  "special": true
227
  },
228
+ "128024": {
229
  "content": "[R-23]",
230
  "lstrip": false,
231
  "normalized": false,
 
233
  "single_word": false,
234
  "special": true
235
  },
236
+ "128025": {
237
  "content": "[R-24]",
238
  "lstrip": false,
239
  "normalized": false,
 
241
  "single_word": false,
242
  "special": true
243
  },
244
+ "128026": {
245
  "content": "--NME--",
246
  "lstrip": false,
247
  "normalized": false,
 
249
  "single_word": false,
250
  "special": true
251
  },
252
+ "128027": {
253
  "content": "[E-0]",
254
  "lstrip": false,
255
  "normalized": false,
 
257
  "single_word": false,
258
  "special": true
259
  },
260
+ "128028": {
261
  "content": "[E-1]",
262
  "lstrip": false,
263
  "normalized": false,
 
265
  "single_word": false,
266
  "special": true
267
  },
268
+ "128029": {
269
  "content": "[E-2]",
270
  "lstrip": false,
271
  "normalized": false,
 
273
  "single_word": false,
274
  "special": true
275
  },
276
+ "128030": {
277
  "content": "[E-3]",
278
  "lstrip": false,
279
  "normalized": false,
 
281
  "single_word": false,
282
  "special": true
283
  },
284
+ "128031": {
285
  "content": "[E-4]",
286
  "lstrip": false,
287
  "normalized": false,
 
289
  "single_word": false,
290
  "special": true
291
  },
292
+ "128032": {
293
  "content": "[E-5]",
294
  "lstrip": false,
295
  "normalized": false,
 
297
  "single_word": false,
298
  "special": true
299
  },
300
+ "128033": {
301
  "content": "[E-6]",
302
  "lstrip": false,
303
  "normalized": false,
 
305
  "single_word": false,
306
  "special": true
307
  },
308
+ "128034": {
309
  "content": "[E-7]",
310
  "lstrip": false,
311
  "normalized": false,
 
313
  "single_word": false,
314
  "special": true
315
  },
316
+ "128035": {
317
  "content": "[E-8]",
318
  "lstrip": false,
319
  "normalized": false,
 
321
  "single_word": false,
322
  "special": true
323
  },
324
+ "128036": {
325
  "content": "[E-9]",
326
  "lstrip": false,
327
  "normalized": false,
 
329
  "single_word": false,
330
  "special": true
331
  },
332
+ "128037": {
333
  "content": "[E-10]",
334
  "lstrip": false,
335
  "normalized": false,
 
337
  "single_word": false,
338
  "special": true
339
  },
340
+ "128038": {
341
  "content": "[E-11]",
342
  "lstrip": false,
343
  "normalized": false,
 
347
  }
348
  },
349
  "additional_special_tokens": [
 
350
  "[R-0]",
351
  "[R-1]",
352
  "[R-2]",