|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256001": { |
|
"content": "__afr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256002": { |
|
"content": "__amh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256003": { |
|
"content": "__arb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256004": { |
|
"content": "__ary__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256005": { |
|
"content": "__arz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256006": { |
|
"content": "__asm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256007": { |
|
"content": "__azj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256008": { |
|
"content": "__bel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256009": { |
|
"content": "__ben__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256010": { |
|
"content": "__bos__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256011": { |
|
"content": "__bul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256012": { |
|
"content": "__cat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256013": { |
|
"content": "__ceb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256014": { |
|
"content": "__ces__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256015": { |
|
"content": "__ckb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256016": { |
|
"content": "__cmn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256017": { |
|
"content": "__cmn_Hant__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256018": { |
|
"content": "__cym__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256019": { |
|
"content": "__dan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256020": { |
|
"content": "__deu__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256021": { |
|
"content": "__ell__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256022": { |
|
"content": "__eng__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256023": { |
|
"content": "__est__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256024": { |
|
"content": "__eus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256025": { |
|
"content": "__fin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256026": { |
|
"content": "__fra__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256027": { |
|
"content": "__fuv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256028": { |
|
"content": "__gaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256029": { |
|
"content": "__gle__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256030": { |
|
"content": "__glg__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256031": { |
|
"content": "__guj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256032": { |
|
"content": "__heb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256033": { |
|
"content": "__hin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256034": { |
|
"content": "__hrv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256035": { |
|
"content": "__hun__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256036": { |
|
"content": "__hye__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256037": { |
|
"content": "__ibo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256038": { |
|
"content": "__ind__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256039": { |
|
"content": "__isl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256040": { |
|
"content": "__ita__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256041": { |
|
"content": "__jav__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256042": { |
|
"content": "__jpn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256043": { |
|
"content": "__kan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256044": { |
|
"content": "__kat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256045": { |
|
"content": "__kaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256046": { |
|
"content": "__khk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256047": { |
|
"content": "__khm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256048": { |
|
"content": "__kir__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256049": { |
|
"content": "__kor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256050": { |
|
"content": "__lao__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256051": { |
|
"content": "__lit__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256052": { |
|
"content": "__lug__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256053": { |
|
"content": "__luo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256054": { |
|
"content": "__lvs__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256055": { |
|
"content": "__mai__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256056": { |
|
"content": "__mal__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256057": { |
|
"content": "__mar__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256058": { |
|
"content": "__mkd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256059": { |
|
"content": "__mlt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256060": { |
|
"content": "__mni__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256061": { |
|
"content": "__mya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256062": { |
|
"content": "__nld__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256063": { |
|
"content": "__nno__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256064": { |
|
"content": "__nob__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256065": { |
|
"content": "__npi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256066": { |
|
"content": "__nya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256067": { |
|
"content": "__ory__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256068": { |
|
"content": "__pan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256069": { |
|
"content": "__pbt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256070": { |
|
"content": "__pes__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256071": { |
|
"content": "__pol__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256072": { |
|
"content": "__por__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256073": { |
|
"content": "__ron__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256074": { |
|
"content": "__rus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256075": { |
|
"content": "__sat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256076": { |
|
"content": "__slk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256077": { |
|
"content": "__slv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256078": { |
|
"content": "__sna__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256079": { |
|
"content": "__snd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256080": { |
|
"content": "__som__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256081": { |
|
"content": "__spa__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256082": { |
|
"content": "__srp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256083": { |
|
"content": "__swe__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256084": { |
|
"content": "__swh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256085": { |
|
"content": "__tam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256086": { |
|
"content": "__tel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256087": { |
|
"content": "__tgk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256088": { |
|
"content": "__tgl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256089": { |
|
"content": "__tha__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256090": { |
|
"content": "__tur__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256091": { |
|
"content": "__ukr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256092": { |
|
"content": "__urd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256093": { |
|
"content": "__uzn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256094": { |
|
"content": "__vie__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256095": { |
|
"content": "__yor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256096": { |
|
"content": "__yue__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256097": { |
|
"content": "__zlm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256098": { |
|
"content": "__zul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256099": { |
|
"content": "<MINED_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256100": { |
|
"content": "<MMT_BT_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256101": { |
|
"content": "<SMT_BT_DATA>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"__afr__", |
|
"__amh__", |
|
"__arb__", |
|
"__ary__", |
|
"__arz__", |
|
"__asm__", |
|
"__azj__", |
|
"__bel__", |
|
"__ben__", |
|
"__bos__", |
|
"__bul__", |
|
"__cat__", |
|
"__ceb__", |
|
"__ces__", |
|
"__ckb__", |
|
"__cmn__", |
|
"__cmn_Hant__", |
|
"__cym__", |
|
"__dan__", |
|
"__deu__", |
|
"__ell__", |
|
"__eng__", |
|
"__est__", |
|
"__eus__", |
|
"__fin__", |
|
"__fra__", |
|
"__fuv__", |
|
"__gaz__", |
|
"__gle__", |
|
"__glg__", |
|
"__guj__", |
|
"__heb__", |
|
"__hin__", |
|
"__hrv__", |
|
"__hun__", |
|
"__hye__", |
|
"__ibo__", |
|
"__ind__", |
|
"__isl__", |
|
"__ita__", |
|
"__jav__", |
|
"__jpn__", |
|
"__kan__", |
|
"__kat__", |
|
"__kaz__", |
|
"__khk__", |
|
"__khm__", |
|
"__kir__", |
|
"__kor__", |
|
"__lao__", |
|
"__lit__", |
|
"__lug__", |
|
"__luo__", |
|
"__lvs__", |
|
"__mai__", |
|
"__mal__", |
|
"__mar__", |
|
"__mkd__", |
|
"__mlt__", |
|
"__mni__", |
|
"__mya__", |
|
"__nld__", |
|
"__nno__", |
|
"__nob__", |
|
"__npi__", |
|
"__nya__", |
|
"__ory__", |
|
"__pan__", |
|
"__pbt__", |
|
"__pes__", |
|
"__pol__", |
|
"__por__", |
|
"__ron__", |
|
"__rus__", |
|
"__sat__", |
|
"__slk__", |
|
"__slv__", |
|
"__sna__", |
|
"__snd__", |
|
"__som__", |
|
"__spa__", |
|
"__srp__", |
|
"__swe__", |
|
"__swh__", |
|
"__tam__", |
|
"__tel__", |
|
"__tgk__", |
|
"__tgl__", |
|
"__tha__", |
|
"__tur__", |
|
"__ukr__", |
|
"__urd__", |
|
"__uzn__", |
|
"__vie__", |
|
"__yor__", |
|
"__yue__", |
|
"__zlm__", |
|
"__zul__", |
|
"<MINED_DATA>", |
|
"<MMT_BT_DATA>", |
|
"<SMT_BT_DATA>" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"language_code": [ |
|
"afr", |
|
"amh", |
|
"arb", |
|
"ary", |
|
"arz", |
|
"asm", |
|
"azj", |
|
"bel", |
|
"ben", |
|
"bos", |
|
"bul", |
|
"cat", |
|
"ceb", |
|
"ces", |
|
"ckb", |
|
"cmn", |
|
"cmn_Hant", |
|
"cym", |
|
"dan", |
|
"deu", |
|
"ell", |
|
"eng", |
|
"est", |
|
"eus", |
|
"fin", |
|
"fra", |
|
"fuv", |
|
"gaz", |
|
"gle", |
|
"glg", |
|
"guj", |
|
"heb", |
|
"hin", |
|
"hrv", |
|
"hun", |
|
"hye", |
|
"ibo", |
|
"ind", |
|
"isl", |
|
"ita", |
|
"jav", |
|
"jpn", |
|
"kan", |
|
"kat", |
|
"kaz", |
|
"khk", |
|
"khm", |
|
"kir", |
|
"kor", |
|
"lao", |
|
"lit", |
|
"lug", |
|
"luo", |
|
"lvs", |
|
"mai", |
|
"mal", |
|
"mar", |
|
"mkd", |
|
"mlt", |
|
"mni", |
|
"mya", |
|
"nld", |
|
"nno", |
|
"nob", |
|
"npi", |
|
"nya", |
|
"ory", |
|
"pan", |
|
"pbt", |
|
"pes", |
|
"pol", |
|
"por", |
|
"ron", |
|
"rus", |
|
"sat", |
|
"slk", |
|
"slv", |
|
"sna", |
|
"snd", |
|
"som", |
|
"spa", |
|
"srp", |
|
"swe", |
|
"swh", |
|
"tam", |
|
"tel", |
|
"tgk", |
|
"tgl", |
|
"tha", |
|
"tur", |
|
"ukr", |
|
"urd", |
|
"uzn", |
|
"vie", |
|
"yor", |
|
"yue", |
|
"zlm", |
|
"zul" |
|
], |
|
"model_max_length": 1000000000000000019884624838656, |
|
"pad_token": "<pad>", |
|
"processor_class": "SeamlessM4TProcessor", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"src_lang": "__eng__", |
|
"tgt_lang": "__fra__", |
|
"tokenizer_class": "SeamlessM4TTokenizer", |
|
"tokenizer_file": null, |
|
"unk_token": "<unk>" |
|
} |
|
|