Spaces:
Running
Running
class NllbLang(): | |
def __init__(self, code, name, code_whisper=None, name_whisper=None): | |
self.code = code | |
self.name = name | |
self.code_whisper = code_whisper | |
self.name_whisper = name_whisper | |
def __str__(self): | |
return "Language(code={}, name={})".format(self.code, self.name) | |
NLLB_LANGS = [ | |
NllbLang('ace_Arab', 'Acehnese (Arabic script)'), | |
NllbLang('ace_Latn', 'Acehnese (Latin script)'), | |
NllbLang('acm_Arab', 'Mesopotamian Arabic', 'ar', 'Arabic'), | |
NllbLang('acq_Arab', 'Ta’izzi-Adeni Arabic', 'ar', 'Arabic'), | |
NllbLang('aeb_Arab', 'Tunisian Arabic'), | |
NllbLang('afr_Latn', 'Afrikaans', 'am', 'Amharic'), | |
NllbLang('ajp_Arab', 'South Levantine Arabic', 'ar', 'Arabic'), | |
NllbLang('aka_Latn', 'Akan'), | |
NllbLang('amh_Ethi', 'Amharic'), | |
NllbLang('apc_Arab', 'North Levantine Arabic', 'ar', 'Arabic'), | |
NllbLang('arb_Arab', 'Modern Standard Arabic', 'ar', 'Arabic'), | |
NllbLang('arb_Latn', 'Modern Standard Arabic (Romanized)'), | |
NllbLang('ars_Arab', 'Najdi Arabic', 'ar', 'Arabic'), | |
NllbLang('ary_Arab', 'Moroccan Arabic', 'ar', 'Arabic'), | |
NllbLang('arz_Arab', 'Egyptian Arabic', 'ar', 'Arabic'), | |
NllbLang('asm_Beng', 'Assamese', 'as', 'Assamese'), | |
NllbLang('ast_Latn', 'Asturian'), | |
NllbLang('awa_Deva', 'Awadhi'), | |
NllbLang('ayr_Latn', 'Central Aymara'), | |
NllbLang('azb_Arab', 'South Azerbaijani', 'az', 'Azerbaijani'), | |
NllbLang('azj_Latn', 'North Azerbaijani', 'az', 'Azerbaijani'), | |
NllbLang('bak_Cyrl', 'Bashkir', 'ba', 'Bashkir'), | |
NllbLang('bam_Latn', 'Bambara'), | |
NllbLang('ban_Latn', 'Balinese'), | |
NllbLang('bel_Cyrl', 'Belarusian', 'be', 'Belarusian'), | |
NllbLang('bem_Latn', 'Bemba'), | |
NllbLang('ben_Beng', 'Bengali', 'bn', 'Bengali'), | |
NllbLang('bho_Deva', 'Bhojpuri'), | |
NllbLang('bjn_Arab', 'Banjar (Arabic script)'), | |
NllbLang('bjn_Latn', 'Banjar (Latin script)'), | |
NllbLang('bod_Tibt', 'Standard Tibetan', 'bo', 'Tibetan'), | |
NllbLang('bos_Latn', 'Bosnian', 'bs', 'Bosnian'), | |
NllbLang('bug_Latn', 'Buginese'), | |
NllbLang('bul_Cyrl', 'Bulgarian', 'bg', 'Bulgarian'), | |
NllbLang('cat_Latn', 'Catalan', 'ca', 'Catalan'), | |
NllbLang('ceb_Latn', 'Cebuano'), | |
NllbLang('ces_Latn', 'Czech', 'cs', 'Czech'), | |
NllbLang('cjk_Latn', 'Chokwe'), | |
NllbLang('ckb_Arab', 'Central Kurdish'), | |
NllbLang('crh_Latn', 'Crimean Tatar'), | |
NllbLang('cym_Latn', 'Welsh', 'cy', 'Welsh'), | |
NllbLang('dan_Latn', 'Danish', 'da', 'Danish'), | |
NllbLang('deu_Latn', 'German', 'de', 'German'), | |
NllbLang('dik_Latn', 'Southwestern Dinka'), | |
NllbLang('dyu_Latn', 'Dyula'), | |
NllbLang('dzo_Tibt', 'Dzongkha'), | |
NllbLang('ell_Grek', 'Greek', 'el', 'Greek'), | |
NllbLang('eng_Latn', 'English', 'en', 'English'), | |
NllbLang('epo_Latn', 'Esperanto'), | |
NllbLang('est_Latn', 'Estonian', 'et', 'Estonian'), | |
NllbLang('eus_Latn', 'Basque', 'eu', 'Basque'), | |
NllbLang('ewe_Latn', 'Ewe'), | |
NllbLang('fao_Latn', 'Faroese', 'fo', 'Faroese'), | |
NllbLang('fij_Latn', 'Fijian'), | |
NllbLang('fin_Latn', 'Finnish', 'fi', 'Finnish'), | |
NllbLang('fon_Latn', 'Fon'), | |
NllbLang('fra_Latn', 'French', 'fr', 'French'), | |
NllbLang('fur_Latn', 'Friulian'), | |
NllbLang('fuv_Latn', 'Nigerian Fulfulde'), | |
NllbLang('gla_Latn', 'Scottish Gaelic'), | |
NllbLang('gle_Latn', 'Irish'), | |
NllbLang('glg_Latn', 'Galician', 'gl', 'Galician'), | |
NllbLang('grn_Latn', 'Guarani'), | |
NllbLang('guj_Gujr', 'Gujarati', 'gu', 'Gujarati'), | |
NllbLang('hat_Latn', 'Haitian Creole', 'ht', 'Haitian creole'), | |
NllbLang('hau_Latn', 'Hausa', 'ha', 'Hausa'), | |
NllbLang('heb_Hebr', 'Hebrew', 'he', 'Hebrew'), | |
NllbLang('hin_Deva', 'Hindi', 'hi', 'Hindi'), | |
NllbLang('hne_Deva', 'Chhattisgarhi'), | |
NllbLang('hrv_Latn', 'Croatian', 'hr', 'Croatian'), | |
NllbLang('hun_Latn', 'Hungarian', 'hu', 'Hungarian'), | |
NllbLang('hye_Armn', 'Armenian', 'hy', 'Armenian'), | |
NllbLang('ibo_Latn', 'Igbo'), | |
NllbLang('ilo_Latn', 'Ilocano'), | |
NllbLang('ind_Latn', 'Indonesian', 'id', 'Indonesian'), | |
NllbLang('isl_Latn', 'Icelandic', 'is', 'Icelandic'), | |
NllbLang('ita_Latn', 'Italian', 'it', 'Italian'), | |
NllbLang('jav_Latn', 'Javanese', 'jw', 'Javanese'), | |
NllbLang('jpn_Jpan', 'Japanese', 'ja', 'Japanese'), | |
NllbLang('kab_Latn', 'Kabyle'), | |
NllbLang('kac_Latn', 'Jingpho'), | |
NllbLang('kam_Latn', 'Kamba'), | |
NllbLang('kan_Knda', 'Kannada', 'kn', 'Kannada'), | |
NllbLang('kas_Arab', 'Kashmiri (Arabic script)'), | |
NllbLang('kas_Deva', 'Kashmiri (Devanagari script)'), | |
NllbLang('kat_Geor', 'Georgian', 'ka', 'Georgian'), | |
NllbLang('knc_Arab', 'Central Kanuri (Arabic script)'), | |
NllbLang('knc_Latn', 'Central Kanuri (Latin script)'), | |
NllbLang('kaz_Cyrl', 'Kazakh', 'kk', 'Kazakh'), | |
NllbLang('kbp_Latn', 'Kabiyè'), | |
NllbLang('kea_Latn', 'Kabuverdianu'), | |
NllbLang('khm_Khmr', 'Khmer', 'km', 'Khmer'), | |
NllbLang('kik_Latn', 'Kikuyu'), | |
NllbLang('kin_Latn', 'Kinyarwanda'), | |
NllbLang('kir_Cyrl', 'Kyrgyz'), | |
NllbLang('kmb_Latn', 'Kimbundu'), | |
NllbLang('kmr_Latn', 'Northern Kurdish'), | |
NllbLang('kon_Latn', 'Kikongo'), | |
NllbLang('kor_Hang', 'Korean', 'ko', 'Korean'), | |
NllbLang('lao_Laoo', 'Lao', 'lo', 'Lao'), | |
NllbLang('lij_Latn', 'Ligurian'), | |
NllbLang('lim_Latn', 'Limburgish'), | |
NllbLang('lin_Latn', 'Lingala', 'ln', 'Lingala'), | |
NllbLang('lit_Latn', 'Lithuanian', 'lt', 'Lithuanian'), | |
NllbLang('lmo_Latn', 'Lombard'), | |
NllbLang('ltg_Latn', 'Latgalian'), | |
NllbLang('ltz_Latn', 'Luxembourgish', 'lb', 'Luxembourgish'), | |
NllbLang('lua_Latn', 'Luba-Kasai'), | |
NllbLang('lug_Latn', 'Ganda'), | |
NllbLang('luo_Latn', 'Luo'), | |
NllbLang('lus_Latn', 'Mizo'), | |
NllbLang('lvs_Latn', 'Standard Latvian', 'lv', 'Latvian'), | |
NllbLang('mag_Deva', 'Magahi'), | |
NllbLang('mai_Deva', 'Maithili'), | |
NllbLang('mal_Mlym', 'Malayalam', 'ml', 'Malayalam'), | |
NllbLang('mar_Deva', 'Marathi', 'mr', 'Marathi'), | |
NllbLang('min_Arab', 'Minangkabau (Arabic script)'), | |
NllbLang('min_Latn', 'Minangkabau (Latin script)'), | |
NllbLang('mkd_Cyrl', 'Macedonian', 'mk', 'Macedonian'), | |
NllbLang('plt_Latn', 'Plateau Malagasy', 'mg', 'Malagasy'), | |
NllbLang('mlt_Latn', 'Maltese', 'mt', 'Maltese'), | |
NllbLang('mni_Beng', 'Meitei (Bengali script)'), | |
NllbLang('khk_Cyrl', 'Halh Mongolian', 'mn', 'Mongolian'), | |
NllbLang('mos_Latn', 'Mossi'), | |
NllbLang('mri_Latn', 'Maori', 'mi', 'Maori'), | |
NllbLang('mya_Mymr', 'Burmese', 'my', 'Myanmar'), | |
NllbLang('nld_Latn', 'Dutch', 'nl', 'Dutch'), | |
NllbLang('nno_Latn', 'Norwegian Nynorsk', 'nn', 'Nynorsk'), | |
NllbLang('nob_Latn', 'Norwegian Bokmål', 'no', 'Norwegian'), | |
NllbLang('npi_Deva', 'Nepali', 'ne', 'Nepali'), | |
NllbLang('nso_Latn', 'Northern Sotho'), | |
NllbLang('nus_Latn', 'Nuer'), | |
NllbLang('nya_Latn', 'Nyanja'), | |
NllbLang('oci_Latn', 'Occitan', 'oc', 'Occitan'), | |
NllbLang('gaz_Latn', 'West Central Oromo'), | |
NllbLang('ory_Orya', 'Odia'), | |
NllbLang('pag_Latn', 'Pangasinan'), | |
NllbLang('pan_Guru', 'Eastern Panjabi', 'pa', 'Punjabi'), | |
NllbLang('pap_Latn', 'Papiamento'), | |
NllbLang('pes_Arab', 'Western Persian', 'fa', 'Persian'), | |
NllbLang('pol_Latn', 'Polish', 'pl', 'Polish'), | |
NllbLang('por_Latn', 'Portuguese', 'pt', 'Portuguese'), | |
NllbLang('prs_Arab', 'Dari'), | |
NllbLang('pbt_Arab', 'Southern Pashto', 'ps', 'Pashto'), | |
NllbLang('quy_Latn', 'Ayacucho Quechua'), | |
NllbLang('ron_Latn', 'Romanian', 'ro', 'Romanian'), | |
NllbLang('run_Latn', 'Rundi'), | |
NllbLang('rus_Cyrl', 'Russian', 'ru', 'Russian'), | |
NllbLang('sag_Latn', 'Sango'), | |
NllbLang('san_Deva', 'Sanskrit', 'sa', 'Sanskrit'), | |
NllbLang('sat_Olck', 'Santali'), | |
NllbLang('scn_Latn', 'Sicilian'), | |
NllbLang('shn_Mymr', 'Shan'), | |
NllbLang('sin_Sinh', 'Sinhala', 'si', 'Sinhala'), | |
NllbLang('slk_Latn', 'Slovak', 'sk', 'Slovak'), | |
NllbLang('slv_Latn', 'Slovenian', 'sl', 'Slovenian'), | |
NllbLang('smo_Latn', 'Samoan'), | |
NllbLang('sna_Latn', 'Shona', 'sn', 'Shona'), | |
NllbLang('snd_Arab', 'Sindhi', 'sd', 'Sindhi'), | |
NllbLang('som_Latn', 'Somali', 'so', 'Somali'), | |
NllbLang('sot_Latn', 'Southern Sotho'), | |
NllbLang('spa_Latn', 'Spanish', 'es', 'Spanish'), | |
NllbLang('als_Latn', 'Tosk Albanian', 'sq', 'Albanian'), | |
NllbLang('srd_Latn', 'Sardinian'), | |
NllbLang('srp_Cyrl', 'Serbian', 'sr', 'Serbian'), | |
NllbLang('ssw_Latn', 'Swati'), | |
NllbLang('sun_Latn', 'Sundanese', 'su', 'Sundanese'), | |
NllbLang('swe_Latn', 'Swedish', 'sv', 'Swedish'), | |
NllbLang('swh_Latn', 'Swahili', 'sw', 'Swahili'), | |
NllbLang('szl_Latn', 'Silesian'), | |
NllbLang('tam_Taml', 'Tamil', 'ta', 'Tamil'), | |
NllbLang('tat_Cyrl', 'Tatar', 'tt', 'Tatar'), | |
NllbLang('tel_Telu', 'Telugu', 'te', 'Telugu'), | |
NllbLang('tgk_Cyrl', 'Tajik', 'tg', 'Tajik'), | |
NllbLang('tgl_Latn', 'Tagalog', 'tl', 'Tagalog'), | |
NllbLang('tha_Thai', 'Thai', 'th', 'Thai'), | |
NllbLang('tir_Ethi', 'Tigrinya'), | |
NllbLang('taq_Latn', 'Tamasheq (Latin script)'), | |
NllbLang('taq_Tfng', 'Tamasheq (Tifinagh script)'), | |
NllbLang('tpi_Latn', 'Tok Pisin'), | |
NllbLang('tsn_Latn', 'Tswana'), | |
NllbLang('tso_Latn', 'Tsonga'), | |
NllbLang('tuk_Latn', 'Turkmen', 'tk', 'Turkmen'), | |
NllbLang('tum_Latn', 'Tumbuka'), | |
NllbLang('tur_Latn', 'Turkish', 'tr', 'Turkish'), | |
NllbLang('twi_Latn', 'Twi'), | |
NllbLang('tzm_Tfng', 'Central Atlas Tamazight'), | |
NllbLang('uig_Arab', 'Uyghur'), | |
NllbLang('ukr_Cyrl', 'Ukrainian', 'uk', 'Ukrainian'), | |
NllbLang('umb_Latn', 'Umbundu'), | |
NllbLang('urd_Arab', 'Urdu', 'ur', 'Urdu'), | |
NllbLang('uzn_Latn', 'Northern Uzbek', 'uz', 'Uzbek'), | |
NllbLang('vec_Latn', 'Venetian'), | |
NllbLang('vie_Latn', 'Vietnamese', 'vi', 'Vietnamese'), | |
NllbLang('war_Latn', 'Waray'), | |
NllbLang('wol_Latn', 'Wolof'), | |
NllbLang('xho_Latn', 'Xhosa'), | |
NllbLang('ydd_Hebr', 'Eastern Yiddish', 'yi', 'Yiddish'), | |
NllbLang('yor_Latn', 'Yoruba', 'yo', 'Yoruba'), | |
NllbLang('yue_Hant', 'Yue Chinese', 'zh', 'Chinese'), | |
NllbLang('zho_Hans', 'Chinese (Simplified)', 'zh', 'Chinese'), | |
NllbLang('zho_Hant', 'Chinese (Traditional)', 'zh', 'Chinese'), | |
NllbLang('zsm_Latn', 'Standard Malay', 'ms', 'Malay'), | |
NllbLang('zul_Latn', 'Zulu'), | |
] | |
_TO_NLLB_LANG_CODE = {language.code.lower(): language for language in NLLB_LANGS if language.code is not None} | |
_TO_NLLB_LANG_NAME = {language.name.lower(): language for language in NLLB_LANGS if language.name is not None} | |
_TO_NLLB_LANG_WHISPER_CODE = {language.code_whisper.lower(): language for language in NLLB_LANGS if language.code_whisper is not None} | |
_TO_NLLB_LANG_WHISPER_NAME = {language.name_whisper.lower(): language for language in NLLB_LANGS if language.name_whisper is not None} | |
def get_nllb_lang_from_code(lang_code, default=None) -> NllbLang: | |
"""Return the language from the language code.""" | |
return _TO_NLLB_LANG_CODE.get(lang_code, default) | |
def get_nllb_lang_from_name(lang_name, default=None) -> NllbLang: | |
"""Return the language from the language name.""" | |
return _TO_NLLB_LANG_NAME.get(lang_name.lower() if lang_name else None, default) | |
def get_nllb_lang_from_code_whisper(lang_code_whisper, default=None) -> NllbLang: | |
"""Return the language from the language code.""" | |
return _TO_NLLB_LANG_WHISPER_CODE.get(lang_code_whisper, default) | |
def get_nllb_lang_from_name_whisper(lang_name_whisper, default=None) -> NllbLang: | |
"""Return the language from the language name.""" | |
return _TO_NLLB_LANG_WHISPER_NAME.get(lang_name_whisper.lower() if lang_name_whisper else None, default) | |
def get_nllb_lang_names(): | |
"""Return a list of language names.""" | |
return [language.name for language in NLLB_LANGS] | |
if __name__ == "__main__": | |
# Test lookup | |
print(get_nllb_lang_from_code('eng_Latn')) | |
print(get_nllb_lang_from_name('English')) | |
print(get_nllb_lang_names()) |