Upload tokenizer
Browse files- tokenizer.json +8 -8
- vocab.json +1 -1
tokenizer.json
CHANGED
@@ -14,7 +14,7 @@
|
|
14 |
},
|
15 |
{
|
16 |
"id": 1,
|
17 |
-
"content": "<|
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
@@ -23,7 +23,7 @@
|
|
23 |
},
|
24 |
{
|
25 |
"id": 2,
|
26 |
-
"content": "<|
|
27 |
"single_word": false,
|
28 |
"lstrip": false,
|
29 |
"rstrip": false,
|
@@ -32,7 +32,7 @@
|
|
32 |
},
|
33 |
{
|
34 |
"id": 3,
|
35 |
-
"content": "<|
|
36 |
"single_word": false,
|
37 |
"lstrip": false,
|
38 |
"rstrip": false,
|
@@ -41,7 +41,7 @@
|
|
41 |
},
|
42 |
{
|
43 |
"id": 4,
|
44 |
-
"content": "<|
|
45 |
"single_word": false,
|
46 |
"lstrip": false,
|
47 |
"rstrip": false,
|
@@ -326,10 +326,10 @@
|
|
326 |
"byte_fallback": false,
|
327 |
"vocab": {
|
328 |
"<|endoftext|>": 0,
|
329 |
-
"<|
|
330 |
-
"<|
|
331 |
-
"<|
|
332 |
-
"<|
|
333 |
"<|meter_0|>": 5,
|
334 |
"<|meter_1|>": 6,
|
335 |
"<|meter_2|>": 7,
|
|
|
14 |
},
|
15 |
{
|
16 |
"id": 1,
|
17 |
+
"content": "<|psep|>",
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
|
|
23 |
},
|
24 |
{
|
25 |
"id": 2,
|
26 |
+
"content": "<|vsep|>",
|
27 |
"single_word": false,
|
28 |
"lstrip": false,
|
29 |
"rstrip": false,
|
|
|
32 |
},
|
33 |
{
|
34 |
"id": 3,
|
35 |
+
"content": "<|bsep|>",
|
36 |
"single_word": false,
|
37 |
"lstrip": false,
|
38 |
"rstrip": false,
|
|
|
41 |
},
|
42 |
{
|
43 |
"id": 4,
|
44 |
+
"content": "<|pad|>",
|
45 |
"single_word": false,
|
46 |
"lstrip": false,
|
47 |
"rstrip": false,
|
|
|
326 |
"byte_fallback": false,
|
327 |
"vocab": {
|
328 |
"<|endoftext|>": 0,
|
329 |
+
"<|psep|>": 1,
|
330 |
+
"<|vsep|>": 2,
|
331 |
+
"<|bsep|>": 3,
|
332 |
+
"<|pad|>": 4,
|
333 |
"<|meter_0|>": 5,
|
334 |
"<|meter_1|>": 6,
|
335 |
"<|meter_2|>": 7,
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<|endoftext|>":0,"<|
|
|
|
1 |
+
{"<|endoftext|>":0,"<|psep|>":1,"<|vsep|>":2,"<|bsep|>":3,"<|pad|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20,"<|res_0|>":21,"<|res_1|>":22,"<|res_2|>":23,"<|res_3|>":24,"<|res_4|>":25,"<|res_5|>":26,"<|res_6|>":27,"<|res_7|>":28,"<|res_8|>":29,"<|res_9|>":30,"<|res_10|>":31,"<|res_11|>":32,"<|res_12|>":33," ":34,"0":35,"1":36,"2":37,"3":38,"4":39,"5":40,"6":41,"7":42,"8":43,"9":44,"<":45,">":46,"_":47,"b":48,"e":49,"m":50,"p":51,"r":52,"s":53,"t":54,"v":55,"|":56,"~":57,"ء":58,"أ":59,"ؤ":60,"ئ":61,"ا":62,"ب":63,"ة":64,"ت":65,"ث":66,"ج":67,"ح":68,"خ":69,"د":70,"ذ":71,"ر":72,"ز":73,"س":74,"ش":75,"ص":76,"ض":77,"ط":78,"ظ":79,"ع":80,"غ":81,"ف":82,"ق":83,"ك":84,"ل":85,"م":86,"ن":87,"ه":88,"و":89,"ى":90,"ي":91,"ً":92,"ٌ":93,"ٍ":94,"َ":95,"ُ":96,"ِ":97,"ّ":98,"ْ":99}
|