vldsavelyev commited on
Commit
78d9cac
1 Parent(s): e41a565

Upload tokenizer

Browse files
Files changed (5) hide show
  1. merges.txt +243 -0
  2. special_tokens_map.json +9 -0
  3. tokenizer.json +799 -0
  4. tokenizer_config.json +12 -0
  5. vocab.json +1 -0
merges.txt ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ Ġ (
3
+ Ġ 0
4
+ Ġ 3
5
+ Ġ |
6
+ Ġ 1
7
+ Ġ r
8
+ Ġ 2
9
+ 1 6
10
+ Ġ 5
11
+ Ġ 4
12
+ Ġ 7
13
+ ) .
14
+ t u
15
+ Ġ :
16
+ Ġ 6
17
+ Ġ 8
18
+ } .
19
+ Ġ 9
20
+ Ġ1 2
21
+ p m
22
+ Ġ1 0
23
+ l r
24
+ } )
25
+ 3 2
26
+ Ġ3 6
27
+ Ġ1 4
28
+ } ).
29
+ Ġ1 5
30
+ Ġ1 1
31
+ Ġ4 2
32
+ 1 0
33
+ Ġ1 3
34
+ Ġ1 7
35
+ Ġ3 5
36
+ Ġ tu
37
+ Ġ3 8
38
+ 1 2
39
+ Ġ4 0
40
+ b e
41
+ s t
42
+ Ġ1 6
43
+ Ġ4 6
44
+ 4 2
45
+ 4 6
46
+ Ġ1 9
47
+ i n
48
+ 3 6
49
+ 1 1
50
+ Ġ E
51
+ 6 4
52
+ 1 4
53
+ Ġ2 4
54
+ Ġ1 8
55
+ ) }
56
+ Ġ2 0
57
+ l e
58
+ i t
59
+ Ġ D
60
+ Ġ "
61
+ r e
62
+ t it
63
+ tit le
64
+ e n
65
+ in g
66
+ r a
67
+ t e
68
+ c k
69
+ Ġ A
70
+ u m
71
+ en t
72
+ m p
73
+ t s
74
+ n ing
75
+ ra ck
76
+ st r
77
+ t rack
78
+ f re
79
+ um ent
80
+ mp o
81
+ str ument
82
+ in strument
83
+ tu ning
84
+ te mpo
85
+ fre ts
86
+ Ġ G
87
+ 1 5
88
+ 4 9
89
+ 3 8
90
+ ) }.
91
+ 1 3
92
+ a h
93
+ Ġ B
94
+ Ġ2 2
95
+ 3 5
96
+ Ġ2 1
97
+ Ġ4 4
98
+ 4 4
99
+ 1 7
100
+ 5 1
101
+ Ġ4 9
102
+ Ġ5 1
103
+ 4 0
104
+ Ġ lr
105
+ 5 7
106
+ Ġ4 1
107
+ Ġ v
108
+ Ġ be
109
+ Ġ5 4
110
+ Ġ4 5
111
+ Ġ5 7
112
+ Ġ4 3
113
+ Ġ g
114
+ n h
115
+ 5 9
116
+ Ġ C
117
+ Ġ5 5
118
+ Ġ5 9
119
+ 1 9
120
+ Ġ3 7
121
+ Ġ4 7
122
+ Ġ5 3
123
+ Ġ3 0
124
+ 5 3
125
+ 4 1
126
+ Ġ8 5
127
+ Ġ6 9
128
+ 5 5
129
+ ) })
130
+ 4 5
131
+ 1 8
132
+ Ġ4 8
133
+ Ġ2 3
134
+ Ġ8 2
135
+ 2 0
136
+ Ġ7 0
137
+ h e
138
+ Ġ2 5
139
+ 4 3
140
+ Ġ2 9
141
+ s h
142
+ ĠE b
143
+ Ġ pm
144
+ Ġ3 3
145
+ 2 2
146
+ o n
147
+ 3 9
148
+ Ġ6 4
149
+ Ġ2 7
150
+ Ġ F
151
+ ĠD b
152
+ Ġ T
153
+ Ġ12 0
154
+ ) }).
155
+ 5 4
156
+ ĠG b
157
+ a n
158
+ ĠA b
159
+ Ġ5 0
160
+ e r
161
+ Ġ3 9
162
+ Ġ2 8
163
+ Ġ3 1
164
+ 3 3
165
+ Ġ S
166
+ Ġ7 3
167
+ Ġ6 3
168
+ a r
169
+ 6 9
170
+ Ġ2 6
171
+ o u
172
+ 2 1
173
+ s u
174
+ Ġ6 1
175
+ ĠB b
176
+ p h
177
+ Ġ h
178
+ 2 4
179
+ b title
180
+ su btitle
181
+ o r
182
+ Ġ M
183
+ Ġ f
184
+ Ġ ah
185
+ 4 7
186
+ Ġ3 2
187
+ Ġ st
188
+ 4 8
189
+ Ġ t
190
+ Ġ Ã
191
+ Ġ O
192
+ 3 7
193
+ a l
194
+ Ġ nh
195
+ Ġ3 4
196
+ 5 2
197
+ m e
198
+ Ġ5 2
199
+ Ġ L
200
+ a t
201
+ Ã ¥
202
+ ĠT he
203
+ Ã ®
204
+ Ã ł
205
+ Ġ W
206
+ T he
207
+ e s
208
+ Ġ I
209
+ l l
210
+ v e
211
+ Ġ8 0
212
+ Ġ6 2
213
+ 2 3
214
+ 8 2
215
+ Ġ H
216
+ 7 0
217
+ Ġ10 0
218
+ Ġ R
219
+ Ã¥ Ã
220
+ î Ã
221
+ t h
222
+ ) "
223
+ i g
224
+ a d
225
+ Ġ" "
226
+ Ġ5 6
227
+ i c
228
+ Ġt he
229
+ o w
230
+ o m
231
+ Ġ8 7
232
+ i s
233
+ Ġ o
234
+ Ġ7 4
235
+ e d
236
+ 2 8
237
+ a y
238
+ Ãł Ã
239
+ Ġ P
240
+ Ġo f
241
+ 2 7
242
+ ĠO f
243
+ u n
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|pad|>"
4
+ ],
5
+ "bos_token": "<|endoftext|>",
6
+ "eos_token": "<|endoftext|>",
7
+ "pad_token": "<|pad|>",
8
+ "unk_token": "<|endoftext|>"
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<|endoftext|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<|pad|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ }
24
+ ],
25
+ "normalizer": null,
26
+ "pre_tokenizer": {
27
+ "type": "ByteLevel",
28
+ "add_prefix_space": false,
29
+ "trim_offsets": true,
30
+ "use_regex": true
31
+ },
32
+ "post_processor": {
33
+ "type": "ByteLevel",
34
+ "add_prefix_space": true,
35
+ "trim_offsets": false,
36
+ "use_regex": true
37
+ },
38
+ "decoder": {
39
+ "type": "ByteLevel",
40
+ "add_prefix_space": true,
41
+ "trim_offsets": true,
42
+ "use_regex": true
43
+ },
44
+ "model": {
45
+ "type": "BPE",
46
+ "dropout": null,
47
+ "unk_token": null,
48
+ "continuing_subword_prefix": "",
49
+ "end_of_word_suffix": "",
50
+ "fuse_unk": false,
51
+ "byte_fallback": false,
52
+ "vocab": {
53
+ "<|endoftext|>": 0,
54
+ "<|pad|>": 1,
55
+ "!": 2,
56
+ "\"": 3,
57
+ "#": 4,
58
+ "$": 5,
59
+ "%": 6,
60
+ "&": 7,
61
+ "'": 8,
62
+ "(": 9,
63
+ ")": 10,
64
+ "*": 11,
65
+ "+": 12,
66
+ ",": 13,
67
+ "-": 14,
68
+ ".": 15,
69
+ "/": 16,
70
+ "0": 17,
71
+ "1": 18,
72
+ "2": 19,
73
+ "3": 20,
74
+ "4": 21,
75
+ "5": 22,
76
+ "6": 23,
77
+ "7": 24,
78
+ "8": 25,
79
+ "9": 26,
80
+ ":": 27,
81
+ ";": 28,
82
+ "<": 29,
83
+ "=": 30,
84
+ ">": 31,
85
+ "?": 32,
86
+ "@": 33,
87
+ "A": 34,
88
+ "B": 35,
89
+ "C": 36,
90
+ "D": 37,
91
+ "E": 38,
92
+ "F": 39,
93
+ "G": 40,
94
+ "H": 41,
95
+ "I": 42,
96
+ "J": 43,
97
+ "K": 44,
98
+ "L": 45,
99
+ "M": 46,
100
+ "N": 47,
101
+ "O": 48,
102
+ "P": 49,
103
+ "Q": 50,
104
+ "R": 51,
105
+ "S": 52,
106
+ "T": 53,
107
+ "U": 54,
108
+ "V": 55,
109
+ "W": 56,
110
+ "X": 57,
111
+ "Y": 58,
112
+ "Z": 59,
113
+ "[": 60,
114
+ "\\": 61,
115
+ "]": 62,
116
+ "^": 63,
117
+ "_": 64,
118
+ "`": 65,
119
+ "a": 66,
120
+ "b": 67,
121
+ "c": 68,
122
+ "d": 69,
123
+ "e": 70,
124
+ "f": 71,
125
+ "g": 72,
126
+ "h": 73,
127
+ "i": 74,
128
+ "j": 75,
129
+ "k": 76,
130
+ "l": 77,
131
+ "m": 78,
132
+ "n": 79,
133
+ "o": 80,
134
+ "p": 81,
135
+ "q": 82,
136
+ "r": 83,
137
+ "s": 84,
138
+ "t": 85,
139
+ "u": 86,
140
+ "v": 87,
141
+ "w": 88,
142
+ "x": 89,
143
+ "y": 90,
144
+ "z": 91,
145
+ "{": 92,
146
+ "|": 93,
147
+ "}": 94,
148
+ "~": 95,
149
+ "¡": 96,
150
+ "¢": 97,
151
+ "£": 98,
152
+ "¤": 99,
153
+ "¥": 100,
154
+ "¦": 101,
155
+ "§": 102,
156
+ "¨": 103,
157
+ "©": 104,
158
+ "ª": 105,
159
+ "«": 106,
160
+ "¬": 107,
161
+ "®": 108,
162
+ "¯": 109,
163
+ "°": 110,
164
+ "±": 111,
165
+ "²": 112,
166
+ "³": 113,
167
+ "´": 114,
168
+ "µ": 115,
169
+ "¶": 116,
170
+ "·": 117,
171
+ "¸": 118,
172
+ "¹": 119,
173
+ "º": 120,
174
+ "»": 121,
175
+ "¼": 122,
176
+ "½": 123,
177
+ "¾": 124,
178
+ "¿": 125,
179
+ "À": 126,
180
+ "Á": 127,
181
+ "Â": 128,
182
+ "Ã": 129,
183
+ "Ä": 130,
184
+ "Å": 131,
185
+ "Æ": 132,
186
+ "Ç": 133,
187
+ "È": 134,
188
+ "É": 135,
189
+ "Ê": 136,
190
+ "Ë": 137,
191
+ "Ì": 138,
192
+ "Í": 139,
193
+ "Î": 140,
194
+ "Ï": 141,
195
+ "Ð": 142,
196
+ "Ñ": 143,
197
+ "Ò": 144,
198
+ "Ó": 145,
199
+ "Ô": 146,
200
+ "Õ": 147,
201
+ "Ö": 148,
202
+ "×": 149,
203
+ "Ø": 150,
204
+ "Ù": 151,
205
+ "Ú": 152,
206
+ "Û": 153,
207
+ "Ü": 154,
208
+ "Ý": 155,
209
+ "Þ": 156,
210
+ "ß": 157,
211
+ "à": 158,
212
+ "á": 159,
213
+ "â": 160,
214
+ "ã": 161,
215
+ "ä": 162,
216
+ "å": 163,
217
+ "æ": 164,
218
+ "ç": 165,
219
+ "è": 166,
220
+ "é": 167,
221
+ "ê": 168,
222
+ "ë": 169,
223
+ "ì": 170,
224
+ "í": 171,
225
+ "î": 172,
226
+ "ï": 173,
227
+ "ð": 174,
228
+ "ñ": 175,
229
+ "ò": 176,
230
+ "ó": 177,
231
+ "ô": 178,
232
+ "õ": 179,
233
+ "ö": 180,
234
+ "÷": 181,
235
+ "ø": 182,
236
+ "ù": 183,
237
+ "ú": 184,
238
+ "û": 185,
239
+ "ü": 186,
240
+ "ý": 187,
241
+ "þ": 188,
242
+ "ÿ": 189,
243
+ "Ā": 190,
244
+ "ā": 191,
245
+ "Ă": 192,
246
+ "ă": 193,
247
+ "Ą": 194,
248
+ "ą": 195,
249
+ "Ć": 196,
250
+ "ć": 197,
251
+ "Ĉ": 198,
252
+ "ĉ": 199,
253
+ "Ċ": 200,
254
+ "ċ": 201,
255
+ "Č": 202,
256
+ "č": 203,
257
+ "Ď": 204,
258
+ "ď": 205,
259
+ "Đ": 206,
260
+ "đ": 207,
261
+ "Ē": 208,
262
+ "ē": 209,
263
+ "Ĕ": 210,
264
+ "ĕ": 211,
265
+ "Ė": 212,
266
+ "ė": 213,
267
+ "Ę": 214,
268
+ "ę": 215,
269
+ "Ě": 216,
270
+ "ě": 217,
271
+ "Ĝ": 218,
272
+ "ĝ": 219,
273
+ "Ğ": 220,
274
+ "ğ": 221,
275
+ "Ġ": 222,
276
+ "ġ": 223,
277
+ "Ģ": 224,
278
+ "ģ": 225,
279
+ "Ĥ": 226,
280
+ "ĥ": 227,
281
+ "Ħ": 228,
282
+ "ħ": 229,
283
+ "Ĩ": 230,
284
+ "ĩ": 231,
285
+ "Ī": 232,
286
+ "ī": 233,
287
+ "Ĭ": 234,
288
+ "ĭ": 235,
289
+ "Į": 236,
290
+ "į": 237,
291
+ "İ": 238,
292
+ "ı": 239,
293
+ "IJ": 240,
294
+ "ij": 241,
295
+ "Ĵ": 242,
296
+ "ĵ": 243,
297
+ "Ķ": 244,
298
+ "ķ": 245,
299
+ "ĸ": 246,
300
+ "Ĺ": 247,
301
+ "ĺ": 248,
302
+ "Ļ": 249,
303
+ "ļ": 250,
304
+ "Ľ": 251,
305
+ "ľ": 252,
306
+ "Ŀ": 253,
307
+ "ŀ": 254,
308
+ "Ł": 255,
309
+ "ł": 256,
310
+ "Ń": 257,
311
+ "Ġ(": 258,
312
+ "Ġ0": 259,
313
+ "Ġ3": 260,
314
+ "Ġ|": 261,
315
+ "Ġ1": 262,
316
+ "Ġr": 263,
317
+ "Ġ2": 264,
318
+ "16": 265,
319
+ "Ġ5": 266,
320
+ "Ġ4": 267,
321
+ "Ġ7": 268,
322
+ ").": 269,
323
+ "tu": 270,
324
+ "Ġ:": 271,
325
+ "Ġ6": 272,
326
+ "Ġ8": 273,
327
+ "}.": 274,
328
+ "Ġ9": 275,
329
+ "Ġ12": 276,
330
+ "pm": 277,
331
+ "Ġ10": 278,
332
+ "lr": 279,
333
+ "})": 280,
334
+ "32": 281,
335
+ "Ġ36": 282,
336
+ "Ġ14": 283,
337
+ "}).": 284,
338
+ "Ġ15": 285,
339
+ "Ġ11": 286,
340
+ "Ġ42": 287,
341
+ "10": 288,
342
+ "Ġ13": 289,
343
+ "Ġ17": 290,
344
+ "Ġ35": 291,
345
+ "Ġtu": 292,
346
+ "Ġ38": 293,
347
+ "12": 294,
348
+ "Ġ40": 295,
349
+ "be": 296,
350
+ "st": 297,
351
+ "Ġ16": 298,
352
+ "Ġ46": 299,
353
+ "42": 300,
354
+ "46": 301,
355
+ "Ġ19": 302,
356
+ "in": 303,
357
+ "36": 304,
358
+ "11": 305,
359
+ "ĠE": 306,
360
+ "64": 307,
361
+ "14": 308,
362
+ "Ġ24": 309,
363
+ "Ġ18": 310,
364
+ ")}": 311,
365
+ "Ġ20": 312,
366
+ "le": 313,
367
+ "it": 314,
368
+ "ĠD": 315,
369
+ "Ġ\"": 316,
370
+ "re": 317,
371
+ "tit": 318,
372
+ "title": 319,
373
+ "en": 320,
374
+ "ing": 321,
375
+ "ra": 322,
376
+ "te": 323,
377
+ "ck": 324,
378
+ "ĠA": 325,
379
+ "um": 326,
380
+ "ent": 327,
381
+ "mp": 328,
382
+ "ts": 329,
383
+ "ning": 330,
384
+ "rack": 331,
385
+ "str": 332,
386
+ "track": 333,
387
+ "fre": 334,
388
+ "ument": 335,
389
+ "mpo": 336,
390
+ "strument": 337,
391
+ "instrument": 338,
392
+ "tuning": 339,
393
+ "tempo": 340,
394
+ "frets": 341,
395
+ "ĠG": 342,
396
+ "15": 343,
397
+ "49": 344,
398
+ "38": 345,
399
+ ")}.": 346,
400
+ "13": 347,
401
+ "ah": 348,
402
+ "ĠB": 349,
403
+ "Ġ22": 350,
404
+ "35": 351,
405
+ "Ġ21": 352,
406
+ "Ġ44": 353,
407
+ "44": 354,
408
+ "17": 355,
409
+ "51": 356,
410
+ "Ġ49": 357,
411
+ "Ġ51": 358,
412
+ "40": 359,
413
+ "Ġlr": 360,
414
+ "57": 361,
415
+ "Ġ41": 362,
416
+ "Ġv": 363,
417
+ "Ġbe": 364,
418
+ "Ġ54": 365,
419
+ "Ġ45": 366,
420
+ "Ġ57": 367,
421
+ "Ġ43": 368,
422
+ "Ġg": 369,
423
+ "nh": 370,
424
+ "59": 371,
425
+ "ĠC": 372,
426
+ "Ġ55": 373,
427
+ "Ġ59": 374,
428
+ "19": 375,
429
+ "Ġ37": 376,
430
+ "Ġ47": 377,
431
+ "Ġ53": 378,
432
+ "Ġ30": 379,
433
+ "53": 380,
434
+ "41": 381,
435
+ "Ġ85": 382,
436
+ "Ġ69": 383,
437
+ "55": 384,
438
+ ")})": 385,
439
+ "45": 386,
440
+ "18": 387,
441
+ "Ġ48": 388,
442
+ "Ġ23": 389,
443
+ "Ġ82": 390,
444
+ "20": 391,
445
+ "Ġ70": 392,
446
+ "he": 393,
447
+ "Ġ25": 394,
448
+ "43": 395,
449
+ "Ġ29": 396,
450
+ "sh": 397,
451
+ "ĠEb": 398,
452
+ "Ġpm": 399,
453
+ "Ġ33": 400,
454
+ "22": 401,
455
+ "on": 402,
456
+ "39": 403,
457
+ "Ġ64": 404,
458
+ "Ġ27": 405,
459
+ "ĠF": 406,
460
+ "ĠDb": 407,
461
+ "ĠT": 408,
462
+ "Ġ120": 409,
463
+ ")}).": 410,
464
+ "54": 411,
465
+ "ĠGb": 412,
466
+ "an": 413,
467
+ "ĠAb": 414,
468
+ "Ġ50": 415,
469
+ "er": 416,
470
+ "Ġ39": 417,
471
+ "Ġ28": 418,
472
+ "Ġ31": 419,
473
+ "33": 420,
474
+ "ĠS": 421,
475
+ "Ġ73": 422,
476
+ "Ġ63": 423,
477
+ "ar": 424,
478
+ "69": 425,
479
+ "Ġ26": 426,
480
+ "ou": 427,
481
+ "21": 428,
482
+ "su": 429,
483
+ "Ġ61": 430,
484
+ "ĠBb": 431,
485
+ "ph": 432,
486
+ "Ġh": 433,
487
+ "24": 434,
488
+ "btitle": 435,
489
+ "subtitle": 436,
490
+ "or": 437,
491
+ "ĠM": 438,
492
+ "Ġf": 439,
493
+ "Ġah": 440,
494
+ "47": 441,
495
+ "Ġ32": 442,
496
+ "Ġst": 443,
497
+ "48": 444,
498
+ "Ġt": 445,
499
+ "ĠÃ": 446,
500
+ "ĠO": 447,
501
+ "37": 448,
502
+ "al": 449,
503
+ "Ġnh": 450,
504
+ "Ġ34": 451,
505
+ "52": 452,
506
+ "me": 453,
507
+ "Ġ52": 454,
508
+ "ĠL": 455,
509
+ "at": 456,
510
+ "Ã¥": 457,
511
+ "ĠThe": 458,
512
+ "î": 459,
513
+ "Ãł": 460,
514
+ "ĠW": 461,
515
+ "The": 462,
516
+ "es": 463,
517
+ "ĠI": 464,
518
+ "ll": 465,
519
+ "ve": 466,
520
+ "Ġ80": 467,
521
+ "Ġ62": 468,
522
+ "23": 469,
523
+ "82": 470,
524
+ "ĠH": 471,
525
+ "70": 472,
526
+ "Ġ100": 473,
527
+ "ĠR": 474,
528
+ "Ã¥Ã": 475,
529
+ "îÃ": 476,
530
+ "th": 477,
531
+ ")\"": 478,
532
+ "ig": 479,
533
+ "ad": 480,
534
+ "Ġ\"\"": 481,
535
+ "Ġ56": 482,
536
+ "ic": 483,
537
+ "Ġthe": 484,
538
+ "ow": 485,
539
+ "om": 486,
540
+ "Ġ87": 487,
541
+ "is": 488,
542
+ "Ġo": 489,
543
+ "Ġ74": 490,
544
+ "ed": 491,
545
+ "28": 492,
546
+ "ay": 493,
547
+ "ÃłÃ": 494,
548
+ "ĠP": 495,
549
+ "Ġof": 496,
550
+ "27": 497,
551
+ "ĠOf": 498,
552
+ "un": 499
553
+ },
554
+ "merges": [
555
+ "Ġ (",
556
+ "Ġ 0",
557
+ "Ġ 3",
558
+ "Ġ |",
559
+ "Ġ 1",
560
+ "Ġ r",
561
+ "Ġ 2",
562
+ "1 6",
563
+ "Ġ 5",
564
+ "Ġ 4",
565
+ "Ġ 7",
566
+ ") .",
567
+ "t u",
568
+ "Ġ :",
569
+ "Ġ 6",
570
+ "Ġ 8",
571
+ "} .",
572
+ "Ġ 9",
573
+ "Ġ1 2",
574
+ "p m",
575
+ "Ġ1 0",
576
+ "l r",
577
+ "} )",
578
+ "3 2",
579
+ "Ġ3 6",
580
+ "Ġ1 4",
581
+ "} ).",
582
+ "Ġ1 5",
583
+ "Ġ1 1",
584
+ "Ġ4 2",
585
+ "1 0",
586
+ "Ġ1 3",
587
+ "Ġ1 7",
588
+ "Ġ3 5",
589
+ "Ġ tu",
590
+ "Ġ3 8",
591
+ "1 2",
592
+ "Ġ4 0",
593
+ "b e",
594
+ "s t",
595
+ "Ġ1 6",
596
+ "Ġ4 6",
597
+ "4 2",
598
+ "4 6",
599
+ "Ġ1 9",
600
+ "i n",
601
+ "3 6",
602
+ "1 1",
603
+ "Ġ E",
604
+ "6 4",
605
+ "1 4",
606
+ "Ġ2 4",
607
+ "Ġ1 8",
608
+ ") }",
609
+ "Ġ2 0",
610
+ "l e",
611
+ "i t",
612
+ "Ġ D",
613
+ "Ġ \"",
614
+ "r e",
615
+ "t it",
616
+ "tit le",
617
+ "e n",
618
+ "in g",
619
+ "r a",
620
+ "t e",
621
+ "c k",
622
+ "Ġ A",
623
+ "u m",
624
+ "en t",
625
+ "m p",
626
+ "t s",
627
+ "n ing",
628
+ "ra ck",
629
+ "st r",
630
+ "t rack",
631
+ "f re",
632
+ "um ent",
633
+ "mp o",
634
+ "str ument",
635
+ "in strument",
636
+ "tu ning",
637
+ "te mpo",
638
+ "fre ts",
639
+ "Ġ G",
640
+ "1 5",
641
+ "4 9",
642
+ "3 8",
643
+ ") }.",
644
+ "1 3",
645
+ "a h",
646
+ "Ġ B",
647
+ "Ġ2 2",
648
+ "3 5",
649
+ "Ġ2 1",
650
+ "Ġ4 4",
651
+ "4 4",
652
+ "1 7",
653
+ "5 1",
654
+ "Ġ4 9",
655
+ "Ġ5 1",
656
+ "4 0",
657
+ "Ġ lr",
658
+ "5 7",
659
+ "Ġ4 1",
660
+ "Ġ v",
661
+ "Ġ be",
662
+ "Ġ5 4",
663
+ "Ġ4 5",
664
+ "Ġ5 7",
665
+ "Ġ4 3",
666
+ "Ġ g",
667
+ "n h",
668
+ "5 9",
669
+ "Ġ C",
670
+ "Ġ5 5",
671
+ "Ġ5 9",
672
+ "1 9",
673
+ "Ġ3 7",
674
+ "Ġ4 7",
675
+ "Ġ5 3",
676
+ "Ġ3 0",
677
+ "5 3",
678
+ "4 1",
679
+ "Ġ8 5",
680
+ "Ġ6 9",
681
+ "5 5",
682
+ ") })",
683
+ "4 5",
684
+ "1 8",
685
+ "Ġ4 8",
686
+ "Ġ2 3",
687
+ "Ġ8 2",
688
+ "2 0",
689
+ "Ġ7 0",
690
+ "h e",
691
+ "Ġ2 5",
692
+ "4 3",
693
+ "Ġ2 9",
694
+ "s h",
695
+ "ĠE b",
696
+ "Ġ pm",
697
+ "Ġ3 3",
698
+ "2 2",
699
+ "o n",
700
+ "3 9",
701
+ "Ġ6 4",
702
+ "Ġ2 7",
703
+ "Ġ F",
704
+ "ĠD b",
705
+ "Ġ T",
706
+ "Ġ12 0",
707
+ ") }).",
708
+ "5 4",
709
+ "ĠG b",
710
+ "a n",
711
+ "ĠA b",
712
+ "Ġ5 0",
713
+ "e r",
714
+ "Ġ3 9",
715
+ "Ġ2 8",
716
+ "Ġ3 1",
717
+ "3 3",
718
+ "Ġ S",
719
+ "Ġ7 3",
720
+ "Ġ6 3",
721
+ "a r",
722
+ "6 9",
723
+ "Ġ2 6",
724
+ "o u",
725
+ "2 1",
726
+ "s u",
727
+ "Ġ6 1",
728
+ "ĠB b",
729
+ "p h",
730
+ "Ġ h",
731
+ "2 4",
732
+ "b title",
733
+ "su btitle",
734
+ "o r",
735
+ "Ġ M",
736
+ "Ġ f",
737
+ "Ġ ah",
738
+ "4 7",
739
+ "Ġ3 2",
740
+ "Ġ st",
741
+ "4 8",
742
+ "Ġ t",
743
+ "Ġ Ã",
744
+ "Ġ O",
745
+ "3 7",
746
+ "a l",
747
+ "Ġ nh",
748
+ "Ġ3 4",
749
+ "5 2",
750
+ "m e",
751
+ "Ġ5 2",
752
+ "Ġ L",
753
+ "a t",
754
+ "Ã ¥",
755
+ "ĠT he",
756
+ "Ã ®",
757
+ "Ã ł",
758
+ "Ġ W",
759
+ "T he",
760
+ "e s",
761
+ "Ġ I",
762
+ "l l",
763
+ "v e",
764
+ "Ġ8 0",
765
+ "Ġ6 2",
766
+ "2 3",
767
+ "8 2",
768
+ "Ġ H",
769
+ "7 0",
770
+ "Ġ10 0",
771
+ "Ġ R",
772
+ "Ã¥ Ã",
773
+ "î Ã",
774
+ "t h",
775
+ ") \"",
776
+ "i g",
777
+ "a d",
778
+ "Ġ\" \"",
779
+ "Ġ5 6",
780
+ "i c",
781
+ "Ġt he",
782
+ "o w",
783
+ "o m",
784
+ "Ġ8 7",
785
+ "i s",
786
+ "Ġ o",
787
+ "Ġ7 4",
788
+ "e d",
789
+ "2 8",
790
+ "a y",
791
+ "Ãł Ã",
792
+ "Ġ P",
793
+ "Ġo f",
794
+ "2 7",
795
+ "ĠO f",
796
+ "u n"
797
+ ]
798
+ }
799
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<|pad|>"
5
+ ],
6
+ "bos_token": "<|endoftext|>",
7
+ "eos_token": "<|endoftext|>",
8
+ "model_max_length": 1024,
9
+ "special_tokens_map_file": null,
10
+ "tokenizer_class": "GPT2Tokenizer",
11
+ "unk_token": "<|endoftext|>"
12
+ }
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<|endoftext|>":0,"<|pad|>":1,"!":2,"\"":3,"#":4,"$":5,"%":6,"&":7,"'":8,"(":9,")":10,"*":11,"+":12,",":13,"-":14,".":15,"/":16,"0":17,"1":18,"2":19,"3":20,"4":21,"5":22,"6":23,"7":24,"8":25,"9":26,":":27,";":28,"<":29,"=":30,">":31,"?":32,"@":33,"A":34,"B":35,"C":36,"D":37,"E":38,"F":39,"G":40,"H":41,"I":42,"J":43,"K":44,"L":45,"M":46,"N":47,"O":48,"P":49,"Q":50,"R":51,"S":52,"T":53,"U":54,"V":55,"W":56,"X":57,"Y":58,"Z":59,"[":60,"\\":61,"]":62,"^":63,"_":64,"`":65,"a":66,"b":67,"c":68,"d":69,"e":70,"f":71,"g":72,"h":73,"i":74,"j":75,"k":76,"l":77,"m":78,"n":79,"o":80,"p":81,"q":82,"r":83,"s":84,"t":85,"u":86,"v":87,"w":88,"x":89,"y":90,"z":91,"{":92,"|":93,"}":94,"~":95,"¡":96,"¢":97,"£":98,"¤":99,"¥":100,"¦":101,"§":102,"¨":103,"©":104,"ª":105,"«":106,"¬":107,"®":108,"¯":109,"°":110,"±":111,"²":112,"³":113,"´":114,"µ":115,"¶":116,"·":117,"¸":118,"¹":119,"º":120,"»":121,"¼":122,"½":123,"¾":124,"¿":125,"À":126,"Á":127,"Â":128,"Ã":129,"Ä":130,"Å":131,"Æ":132,"Ç":133,"È":134,"É":135,"Ê":136,"Ë":137,"Ì":138,"Í":139,"Î":140,"Ï":141,"Ð":142,"Ñ":143,"Ò":144,"Ó":145,"Ô":146,"Õ":147,"Ö":148,"×":149,"Ø":150,"Ù":151,"Ú":152,"Û":153,"Ü":154,"Ý":155,"Þ":156,"ß":157,"à":158,"á":159,"â":160,"ã":161,"ä":162,"å":163,"æ":164,"ç":165,"è":166,"é":167,"ê":168,"ë":169,"ì":170,"í":171,"î":172,"ï":173,"ð":174,"ñ":175,"ò":176,"ó":177,"ô":178,"õ":179,"ö":180,"÷":181,"ø":182,"ù":183,"ú":184,"û":185,"ü":186,"ý":187,"þ":188,"ÿ":189,"Ā":190,"ā":191,"Ă":192,"ă":193,"Ą":194,"ą":195,"Ć":196,"ć":197,"Ĉ":198,"ĉ":199,"Ċ":200,"ċ":201,"Č":202,"č":203,"Ď":204,"ď":205,"Đ":206,"đ":207,"Ē":208,"ē":209,"Ĕ":210,"ĕ":211,"Ė":212,"ė":213,"Ę":214,"ę":215,"Ě":216,"ě":217,"Ĝ":218,"ĝ":219,"Ğ":220,"ğ":221,"Ġ":222,"ġ":223,"Ģ":224,"ģ":225,"Ĥ":226,"ĥ":227,"Ħ":228,"ħ":229,"Ĩ":230,"ĩ":231,"Ī":232,"ī":233,"Ĭ":234,"ĭ":235,"Į":236,"į":237,"İ":238,"ı":239,"IJ":240,"ij":241,"Ĵ":242,"ĵ":243,"Ķ":244,"ķ":245,"ĸ":246,"Ĺ":247,"ĺ":248,"Ļ":249,"ļ":250,"Ľ":251,"ľ":252,"Ŀ":253,"ŀ":254,"Ł":255,"ł":256,"Ń":257,"Ġ(":258,"Ġ0":259,"Ġ3":260,"Ġ|":261,"Ġ1":262,"Ġr":263,"Ġ2":264,"16":265,"Ġ5":266,"Ġ4":267,"Ġ7":268,").":269,"tu":270,"Ġ:":271,"Ġ6":272,"Ġ8":273,"}.":274,"Ġ9":275,"Ġ12":276,"pm":277,"Ġ10":278,"lr":279,"})":280,"32":281,"Ġ36":282,"Ġ14":283,"}).":284,"Ġ15":285,"Ġ11":286,"Ġ42":287,"10":288,"Ġ13":289,"Ġ17":290,"Ġ35":291,"Ġtu":292,"Ġ38":293,"12":294,"Ġ40":295,"be":296,"st":297,"Ġ16":298,"Ġ46":299,"42":300,"46":301,"Ġ19":302,"in":303,"36":304,"11":305,"ĠE":306,"64":307,"14":308,"Ġ24":309,"Ġ18":310,")}":311,"Ġ20":312,"le":313,"it":314,"ĠD":315,"Ġ\"":316,"re":317,"tit":318,"title":319,"en":320,"ing":321,"ra":322,"te":323,"ck":324,"ĠA":325,"um":326,"ent":327,"mp":328,"ts":329,"ning":330,"rack":331,"str":332,"track":333,"fre":334,"ument":335,"mpo":336,"strument":337,"instrument":338,"tuning":339,"tempo":340,"frets":341,"ĠG":342,"15":343,"49":344,"38":345,")}.":346,"13":347,"ah":348,"ĠB":349,"Ġ22":350,"35":351,"Ġ21":352,"Ġ44":353,"44":354,"17":355,"51":356,"Ġ49":357,"Ġ51":358,"40":359,"Ġlr":360,"57":361,"Ġ41":362,"Ġv":363,"Ġbe":364,"Ġ54":365,"Ġ45":366,"Ġ57":367,"Ġ43":368,"Ġg":369,"nh":370,"59":371,"ĠC":372,"Ġ55":373,"Ġ59":374,"19":375,"Ġ37":376,"Ġ47":377,"Ġ53":378,"Ġ30":379,"53":380,"41":381,"Ġ85":382,"Ġ69":383,"55":384,")})":385,"45":386,"18":387,"Ġ48":388,"Ġ23":389,"Ġ82":390,"20":391,"Ġ70":392,"he":393,"Ġ25":394,"43":395,"Ġ29":396,"sh":397,"ĠEb":398,"Ġpm":399,"Ġ33":400,"22":401,"on":402,"39":403,"Ġ64":404,"Ġ27":405,"ĠF":406,"ĠDb":407,"ĠT":408,"Ġ120":409,")}).":410,"54":411,"ĠGb":412,"an":413,"ĠAb":414,"Ġ50":415,"er":416,"Ġ39":417,"Ġ28":418,"Ġ31":419,"33":420,"ĠS":421,"Ġ73":422,"Ġ63":423,"ar":424,"69":425,"Ġ26":426,"ou":427,"21":428,"su":429,"Ġ61":430,"ĠBb":431,"ph":432,"Ġh":433,"24":434,"btitle":435,"subtitle":436,"or":437,"ĠM":438,"Ġf":439,"Ġah":440,"47":441,"Ġ32":442,"Ġst":443,"48":444,"Ġt":445,"ĠÃ":446,"ĠO":447,"37":448,"al":449,"Ġnh":450,"Ġ34":451,"52":452,"me":453,"Ġ52":454,"ĠL":455,"at":456,"Ã¥":457,"ĠThe":458,"î":459,"Ãł":460,"ĠW":461,"The":462,"es":463,"ĠI":464,"ll":465,"ve":466,"Ġ80":467,"Ġ62":468,"23":469,"82":470,"ĠH":471,"70":472,"Ġ100":473,"ĠR":474,"Ã¥Ã":475,"îÃ":476,"th":477,")\"":478,"ig":479,"ad":480,"Ġ\"\"":481,"Ġ56":482,"ic":483,"Ġthe":484,"ow":485,"om":486,"Ġ87":487,"is":488,"Ġo":489,"Ġ74":490,"ed":491,"28":492,"ay":493,"ÃłÃ":494,"ĠP":495,"Ġof":496,"27":497,"ĠOf":498,"un":499}