Jenalea commited on
Commit
10f70cb
1 Parent(s): 00e8e38

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +1 -119
tokenizer_config.json CHANGED
@@ -1,119 +1 @@
1
- {
2
- "additional_special_tokens": [
3
- "__af__",
4
- "__am__",
5
- "__ar__",
6
- "__ast__",
7
- "__az__",
8
- "__ba__",
9
- "__be__",
10
- "__bg__",
11
- "__bn__",
12
- "__br__",
13
- "__bs__",
14
- "__ca__",
15
- "__ceb__",
16
- "__cs__",
17
- "__cy__",
18
- "__da__",
19
- "__de__",
20
- "__el__",
21
- "__en__",
22
- "__es__",
23
- "__et__",
24
- "__fa__",
25
- "__ff__",
26
- "__fi__",
27
- "__fr__",
28
- "__fy__",
29
- "__ga__",
30
- "__gd__",
31
- "__gl__",
32
- "__gu__",
33
- "__ha__",
34
- "__he__",
35
- "__hi__",
36
- "__hr__",
37
- "__ht__",
38
- "__hu__",
39
- "__hy__",
40
- "__id__",
41
- "__ig__",
42
- "__ilo__",
43
- "__is__",
44
- "__it__",
45
- "__ja__",
46
- "__jv__",
47
- "__ka__",
48
- "__kk__",
49
- "__km__",
50
- "__kn__",
51
- "__ko__",
52
- "__lb__",
53
- "__lg__",
54
- "__ln__",
55
- "__lo__",
56
- "__lt__",
57
- "__lv__",
58
- "__mg__",
59
- "__mk__",
60
- "__ml__",
61
- "__mn__",
62
- "__mr__",
63
- "__ms__",
64
- "__my__",
65
- "__ne__",
66
- "__nl__",
67
- "__no__",
68
- "__ns__",
69
- "__oc__",
70
- "__or__",
71
- "__pa__",
72
- "__pl__",
73
- "__ps__",
74
- "__pt__",
75
- "__ro__",
76
- "__ru__",
77
- "__sd__",
78
- "__si__",
79
- "__sk__",
80
- "__sl__",
81
- "__so__",
82
- "__sq__",
83
- "__sr__",
84
- "__ss__",
85
- "__su__",
86
- "__sv__",
87
- "__sw__",
88
- "__ta__",
89
- "__th__",
90
- "__tl__",
91
- "__tn__",
92
- "__tr__",
93
- "__uk__",
94
- "__ur__",
95
- "__uz__",
96
- "__vi__",
97
- "__wo__",
98
- "__xh__",
99
- "__yi__",
100
- "__yo__",
101
- "__zh__",
102
- "__zu__"
103
- ],
104
- "bos_token": "<s>",
105
- "eos_token": "</s>",
106
- "language_codes": "m2m100",
107
- "model_max_length": 1024,
108
- "name_or_path": "facebook/m2m100_418M",
109
- "num_madeup_words": 8,
110
- "pad_token": "<pad>",
111
- "sep_token": "</s>",
112
- "sp_model_kwargs": {},
113
- "special_tokens_map_file": "m2m_100_1.2B_v2/special_tokens_map.json",
114
- "src_lang": null,
115
- "tgt_lang": null,
116
- "tokenizer_class": "M2M100Tokenizer",
117
- "tokenizer_file": null,
118
- "unk_token": "<unk>"
119
- }
 
1
+ {"src_lang": null, "tgt_lang": null, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "special_tokens_map_file": "m2m_100_1.2B_v2/special_tokens_map.json", "tokenizer_file": null, "name_or_path": "m2m_100_1.2B_v2"}