PereLluis13
commited on
Commit
•
373cf04
1
Parent(s):
1fedaa6
Upload tokenizer
Browse files- added_tokens.json +104 -0
- special_tokens_map.json +112 -0
- spm.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +120 -0
added_tokens.json
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"--NME--": 128001,
|
3 |
+
"[E-0]": 128002,
|
4 |
+
"[E-10]": 128012,
|
5 |
+
"[E-11]": 128013,
|
6 |
+
"[E-12]": 128014,
|
7 |
+
"[E-13]": 128015,
|
8 |
+
"[E-14]": 128016,
|
9 |
+
"[E-15]": 128017,
|
10 |
+
"[E-16]": 128018,
|
11 |
+
"[E-17]": 128019,
|
12 |
+
"[E-18]": 128020,
|
13 |
+
"[E-19]": 128021,
|
14 |
+
"[E-1]": 128003,
|
15 |
+
"[E-20]": 128022,
|
16 |
+
"[E-21]": 128023,
|
17 |
+
"[E-22]": 128024,
|
18 |
+
"[E-23]": 128025,
|
19 |
+
"[E-24]": 128026,
|
20 |
+
"[E-25]": 128027,
|
21 |
+
"[E-26]": 128028,
|
22 |
+
"[E-27]": 128029,
|
23 |
+
"[E-28]": 128030,
|
24 |
+
"[E-29]": 128031,
|
25 |
+
"[E-2]": 128004,
|
26 |
+
"[E-30]": 128032,
|
27 |
+
"[E-31]": 128033,
|
28 |
+
"[E-32]": 128034,
|
29 |
+
"[E-33]": 128035,
|
30 |
+
"[E-34]": 128036,
|
31 |
+
"[E-35]": 128037,
|
32 |
+
"[E-36]": 128038,
|
33 |
+
"[E-37]": 128039,
|
34 |
+
"[E-38]": 128040,
|
35 |
+
"[E-39]": 128041,
|
36 |
+
"[E-3]": 128005,
|
37 |
+
"[E-40]": 128042,
|
38 |
+
"[E-41]": 128043,
|
39 |
+
"[E-42]": 128044,
|
40 |
+
"[E-43]": 128045,
|
41 |
+
"[E-44]": 128046,
|
42 |
+
"[E-45]": 128047,
|
43 |
+
"[E-46]": 128048,
|
44 |
+
"[E-47]": 128049,
|
45 |
+
"[E-48]": 128050,
|
46 |
+
"[E-49]": 128051,
|
47 |
+
"[E-4]": 128006,
|
48 |
+
"[E-50]": 128052,
|
49 |
+
"[E-51]": 128053,
|
50 |
+
"[E-52]": 128054,
|
51 |
+
"[E-53]": 128055,
|
52 |
+
"[E-54]": 128056,
|
53 |
+
"[E-55]": 128057,
|
54 |
+
"[E-56]": 128058,
|
55 |
+
"[E-57]": 128059,
|
56 |
+
"[E-58]": 128060,
|
57 |
+
"[E-59]": 128061,
|
58 |
+
"[E-5]": 128007,
|
59 |
+
"[E-60]": 128062,
|
60 |
+
"[E-61]": 128063,
|
61 |
+
"[E-62]": 128064,
|
62 |
+
"[E-63]": 128065,
|
63 |
+
"[E-64]": 128066,
|
64 |
+
"[E-65]": 128067,
|
65 |
+
"[E-66]": 128068,
|
66 |
+
"[E-67]": 128069,
|
67 |
+
"[E-68]": 128070,
|
68 |
+
"[E-69]": 128071,
|
69 |
+
"[E-6]": 128008,
|
70 |
+
"[E-70]": 128072,
|
71 |
+
"[E-71]": 128073,
|
72 |
+
"[E-72]": 128074,
|
73 |
+
"[E-73]": 128075,
|
74 |
+
"[E-74]": 128076,
|
75 |
+
"[E-75]": 128077,
|
76 |
+
"[E-76]": 128078,
|
77 |
+
"[E-77]": 128079,
|
78 |
+
"[E-78]": 128080,
|
79 |
+
"[E-79]": 128081,
|
80 |
+
"[E-7]": 128009,
|
81 |
+
"[E-80]": 128082,
|
82 |
+
"[E-81]": 128083,
|
83 |
+
"[E-82]": 128084,
|
84 |
+
"[E-83]": 128085,
|
85 |
+
"[E-84]": 128086,
|
86 |
+
"[E-85]": 128087,
|
87 |
+
"[E-86]": 128088,
|
88 |
+
"[E-87]": 128089,
|
89 |
+
"[E-88]": 128090,
|
90 |
+
"[E-89]": 128091,
|
91 |
+
"[E-8]": 128010,
|
92 |
+
"[E-90]": 128092,
|
93 |
+
"[E-91]": 128093,
|
94 |
+
"[E-92]": 128094,
|
95 |
+
"[E-93]": 128095,
|
96 |
+
"[E-94]": 128096,
|
97 |
+
"[E-95]": 128097,
|
98 |
+
"[E-96]": 128098,
|
99 |
+
"[E-97]": 128099,
|
100 |
+
"[E-98]": 128100,
|
101 |
+
"[E-99]": 128101,
|
102 |
+
"[E-9]": 128011,
|
103 |
+
"[MASK]": 128000
|
104 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"--NME--",
|
4 |
+
"[E-0]",
|
5 |
+
"[E-1]",
|
6 |
+
"[E-2]",
|
7 |
+
"[E-3]",
|
8 |
+
"[E-4]",
|
9 |
+
"[E-5]",
|
10 |
+
"[E-6]",
|
11 |
+
"[E-7]",
|
12 |
+
"[E-8]",
|
13 |
+
"[E-9]",
|
14 |
+
"[E-10]",
|
15 |
+
"[E-11]",
|
16 |
+
"[E-12]",
|
17 |
+
"[E-13]",
|
18 |
+
"[E-14]",
|
19 |
+
"[E-15]",
|
20 |
+
"[E-16]",
|
21 |
+
"[E-17]",
|
22 |
+
"[E-18]",
|
23 |
+
"[E-19]",
|
24 |
+
"[E-20]",
|
25 |
+
"[E-21]",
|
26 |
+
"[E-22]",
|
27 |
+
"[E-23]",
|
28 |
+
"[E-24]",
|
29 |
+
"[E-25]",
|
30 |
+
"[E-26]",
|
31 |
+
"[E-27]",
|
32 |
+
"[E-28]",
|
33 |
+
"[E-29]",
|
34 |
+
"[E-30]",
|
35 |
+
"[E-31]",
|
36 |
+
"[E-32]",
|
37 |
+
"[E-33]",
|
38 |
+
"[E-34]",
|
39 |
+
"[E-35]",
|
40 |
+
"[E-36]",
|
41 |
+
"[E-37]",
|
42 |
+
"[E-38]",
|
43 |
+
"[E-39]",
|
44 |
+
"[E-40]",
|
45 |
+
"[E-41]",
|
46 |
+
"[E-42]",
|
47 |
+
"[E-43]",
|
48 |
+
"[E-44]",
|
49 |
+
"[E-45]",
|
50 |
+
"[E-46]",
|
51 |
+
"[E-47]",
|
52 |
+
"[E-48]",
|
53 |
+
"[E-49]",
|
54 |
+
"[E-50]",
|
55 |
+
"[E-51]",
|
56 |
+
"[E-52]",
|
57 |
+
"[E-53]",
|
58 |
+
"[E-54]",
|
59 |
+
"[E-55]",
|
60 |
+
"[E-56]",
|
61 |
+
"[E-57]",
|
62 |
+
"[E-58]",
|
63 |
+
"[E-59]",
|
64 |
+
"[E-60]",
|
65 |
+
"[E-61]",
|
66 |
+
"[E-62]",
|
67 |
+
"[E-63]",
|
68 |
+
"[E-64]",
|
69 |
+
"[E-65]",
|
70 |
+
"[E-66]",
|
71 |
+
"[E-67]",
|
72 |
+
"[E-68]",
|
73 |
+
"[E-69]",
|
74 |
+
"[E-70]",
|
75 |
+
"[E-71]",
|
76 |
+
"[E-72]",
|
77 |
+
"[E-73]",
|
78 |
+
"[E-74]",
|
79 |
+
"[E-75]",
|
80 |
+
"[E-76]",
|
81 |
+
"[E-77]",
|
82 |
+
"[E-78]",
|
83 |
+
"[E-79]",
|
84 |
+
"[E-80]",
|
85 |
+
"[E-81]",
|
86 |
+
"[E-82]",
|
87 |
+
"[E-83]",
|
88 |
+
"[E-84]",
|
89 |
+
"[E-85]",
|
90 |
+
"[E-86]",
|
91 |
+
"[E-87]",
|
92 |
+
"[E-88]",
|
93 |
+
"[E-89]",
|
94 |
+
"[E-90]",
|
95 |
+
"[E-91]",
|
96 |
+
"[E-92]",
|
97 |
+
"[E-93]",
|
98 |
+
"[E-94]",
|
99 |
+
"[E-95]",
|
100 |
+
"[E-96]",
|
101 |
+
"[E-97]",
|
102 |
+
"[E-98]",
|
103 |
+
"[E-99]"
|
104 |
+
],
|
105 |
+
"bos_token": "[CLS]",
|
106 |
+
"cls_token": "[CLS]",
|
107 |
+
"eos_token": "[SEP]",
|
108 |
+
"mask_token": "[MASK]",
|
109 |
+
"pad_token": "[PAD]",
|
110 |
+
"sep_token": "[SEP]",
|
111 |
+
"unk_token": "[UNK]"
|
112 |
+
}
|
spm.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
|
3 |
+
size 2464616
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": true,
|
3 |
+
"additional_special_tokens": [
|
4 |
+
"--NME--",
|
5 |
+
"[E-0]",
|
6 |
+
"[E-1]",
|
7 |
+
"[E-2]",
|
8 |
+
"[E-3]",
|
9 |
+
"[E-4]",
|
10 |
+
"[E-5]",
|
11 |
+
"[E-6]",
|
12 |
+
"[E-7]",
|
13 |
+
"[E-8]",
|
14 |
+
"[E-9]",
|
15 |
+
"[E-10]",
|
16 |
+
"[E-11]",
|
17 |
+
"[E-12]",
|
18 |
+
"[E-13]",
|
19 |
+
"[E-14]",
|
20 |
+
"[E-15]",
|
21 |
+
"[E-16]",
|
22 |
+
"[E-17]",
|
23 |
+
"[E-18]",
|
24 |
+
"[E-19]",
|
25 |
+
"[E-20]",
|
26 |
+
"[E-21]",
|
27 |
+
"[E-22]",
|
28 |
+
"[E-23]",
|
29 |
+
"[E-24]",
|
30 |
+
"[E-25]",
|
31 |
+
"[E-26]",
|
32 |
+
"[E-27]",
|
33 |
+
"[E-28]",
|
34 |
+
"[E-29]",
|
35 |
+
"[E-30]",
|
36 |
+
"[E-31]",
|
37 |
+
"[E-32]",
|
38 |
+
"[E-33]",
|
39 |
+
"[E-34]",
|
40 |
+
"[E-35]",
|
41 |
+
"[E-36]",
|
42 |
+
"[E-37]",
|
43 |
+
"[E-38]",
|
44 |
+
"[E-39]",
|
45 |
+
"[E-40]",
|
46 |
+
"[E-41]",
|
47 |
+
"[E-42]",
|
48 |
+
"[E-43]",
|
49 |
+
"[E-44]",
|
50 |
+
"[E-45]",
|
51 |
+
"[E-46]",
|
52 |
+
"[E-47]",
|
53 |
+
"[E-48]",
|
54 |
+
"[E-49]",
|
55 |
+
"[E-50]",
|
56 |
+
"[E-51]",
|
57 |
+
"[E-52]",
|
58 |
+
"[E-53]",
|
59 |
+
"[E-54]",
|
60 |
+
"[E-55]",
|
61 |
+
"[E-56]",
|
62 |
+
"[E-57]",
|
63 |
+
"[E-58]",
|
64 |
+
"[E-59]",
|
65 |
+
"[E-60]",
|
66 |
+
"[E-61]",
|
67 |
+
"[E-62]",
|
68 |
+
"[E-63]",
|
69 |
+
"[E-64]",
|
70 |
+
"[E-65]",
|
71 |
+
"[E-66]",
|
72 |
+
"[E-67]",
|
73 |
+
"[E-68]",
|
74 |
+
"[E-69]",
|
75 |
+
"[E-70]",
|
76 |
+
"[E-71]",
|
77 |
+
"[E-72]",
|
78 |
+
"[E-73]",
|
79 |
+
"[E-74]",
|
80 |
+
"[E-75]",
|
81 |
+
"[E-76]",
|
82 |
+
"[E-77]",
|
83 |
+
"[E-78]",
|
84 |
+
"[E-79]",
|
85 |
+
"[E-80]",
|
86 |
+
"[E-81]",
|
87 |
+
"[E-82]",
|
88 |
+
"[E-83]",
|
89 |
+
"[E-84]",
|
90 |
+
"[E-85]",
|
91 |
+
"[E-86]",
|
92 |
+
"[E-87]",
|
93 |
+
"[E-88]",
|
94 |
+
"[E-89]",
|
95 |
+
"[E-90]",
|
96 |
+
"[E-91]",
|
97 |
+
"[E-92]",
|
98 |
+
"[E-93]",
|
99 |
+
"[E-94]",
|
100 |
+
"[E-95]",
|
101 |
+
"[E-96]",
|
102 |
+
"[E-97]",
|
103 |
+
"[E-98]",
|
104 |
+
"[E-99]"
|
105 |
+
],
|
106 |
+
"bos_token": "[CLS]",
|
107 |
+
"clean_up_tokenization_spaces": true,
|
108 |
+
"cls_token": "[CLS]",
|
109 |
+
"do_lower_case": false,
|
110 |
+
"eos_token": "[SEP]",
|
111 |
+
"mask_token": "[MASK]",
|
112 |
+
"model_max_length": 1000000000000000019884624838656,
|
113 |
+
"pad_token": "[PAD]",
|
114 |
+
"sep_token": "[SEP]",
|
115 |
+
"sp_model_kwargs": {},
|
116 |
+
"split_by_punct": false,
|
117 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
118 |
+
"unk_token": "[UNK]",
|
119 |
+
"vocab_type": "spm"
|
120 |
+
}
|