Spaces:

OOI-FrontierTech
/

demo_language_moore

Running

App Files Files Community

khof312 commited on Oct 31, 2023

Commit

e5e9b34

•

1 Parent(s): 4a66b98

Initial commit of the app.

Browse files

Files changed (7) hide show

moore_app.py +98 -0
src/__init__.py +5 -0
src/helpers.py +16 -0
src/language_id.py +41 -0
src/speech_to_text.py +45 -0
src/text_to_speech.py +35 -0
src/translation.py +136 -0

moore_app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import torch
+import scipy
+import os
+import streamlit as st
+from transformers import set_seed, pipeline
+from transformers import VitsTokenizer, VitsModel
+from datasets import load_dataset, Audio
+from IPython.display import Audio as Aud
+from src import *
+from huggingface_hub import login
+from dotenv import load_dotenv
+#load_dotenv()
+#HUGGINGFACE_KEY = os.environ.get("HUGGINGFACE_KEY")
+#login(HUGGINGFACE_KEY)
+########################
+language_list = ['mos', 'fra', 'eng']
+st.title("Demo: Automated Tools for Mooré Language")
+tts, stt, trans, lid = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID"])
+########################
+with tts:
+    tts_text = st.text_area(label = "Please enter your text here:", value="", placeholder="ne y wĩndga")
+    tts_col1, tts_col2,  = st.columns(2)
+    with tts_col1:
+        tts_lang = st.selectbox('Language of text', (language_list), format_func = decode_iso)
+    if st.button("Speak"):
+        st.divider()
+        with st.spinner(":rainbow[Synthesizing, please wait...]"):
+            synth = synthesize_facebook(tts_text, tts_lang)
+            st.audio(synth, sample_rate=16_000)
+########################
+with stt:
+    stt_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "stt_uploader")
+    stt_lang = st.selectbox("Please select the language:" , (language_list), format_func = decode_iso)
+    if st.button("Transcribe"):
+        st.divider()
+        with st.spinner("rainbow[Received your file, please wait while I process it...]"):
+            stt = transcribe(stt_file, stt_lang)
+            ":violet[The transcription is:]"
+            ':violet[ "' + stt + '"]'
+########################
+with trans:
+    trans_text = st.text_area(label = "Please enter your translation text here:", value="", placeholder="ne y wĩndga")
+    #trans_col1, trans_col2, trans_col3 = st.columns([.25, .25, .5])
+    trans_col1, trans_col2 = st.columns(2)
+    with trans_col1:
+        src_lang = st.selectbox('Translate from:', (language_list), format_func = decode_iso)
+    with trans_col2:
+        target_lang = st.selectbox('Translate to:', (language_list), format_func = decode_iso, index=1)
+    #with trans_col3:
+    #    trans_model = st.selectbox("Translation model:",
+    #                            ("Facebook (nllb-200-distilled-600M)",
+    #                             "Helsinki NLP (opus-mt-mos-en)",
+    #                             "Masakhane (m2m100_418m_mos_fr_news)")
+    #                           )
+    if st.button("Translate"):
+        st.divider()
+        with st.spinner(":rainbow[Translating from " + decode_iso(src_lang) + " into " + decode_iso(target_lang) + ", please wait...]"):
+            translation = translate(trans_text, src_lang, target_lang) #, trans_model)
+            translation
+########################
+with lid:
+    langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")
+    if st.button("Identify"):
+        st.divider()
+        with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
+            lang = identify_language(langid_file)
+            lang = decode_iso(lang)
+            ":violet[The detected language is " + lang + "]"
+# supported colors: blue, green, orange, red, violet, gray/grey, rainbow.
+# https://docs.streamlit.io/library/api-reference/text/st.markdown

src/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .helpers import *
+from .language_id import *
+from .speech_to_text import *
+from .text_to_speech import *
+from .translation import *

src/helpers.py ADDED Viewed

	@@ -0,0 +1,16 @@

+iso_encoder = {"English":"eng",
+       "French":"fra",
+       "Moore": "mos"}
+iso_decoder = dict((v,k) for k,v in iso_encoder.items())
+import pycountry
+def encode_iso(lang:str)-> str:
+    '''   Takes the name of a language and returns its ISO-3 code.   '''
+    return  iso_encoder[lang]
+def decode_iso(iso:str)-> str:
+    '''  Takes an ISO-3 code and returns the name of the language.   '''
+    return pycountry.languages.get(alpha_3 = iso).name

src/language_id.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import librosa
+import torch
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+from transformers import set_seed
+def identify_language(fp:str) -> str:
+    '''
+    For given audio file, identify what language it uses.
+    Parameters
+    ----------
+    fp: str
+        The file path to the audio file.
+    Returns
+    ----------
+    detected_lang:str
+        The iso3 code of the detected language.
+    '''
+    # Ensure replicability
+    set_seed(555)
+    # Load language ID model
+    model_id = "facebook/mms-lid-256" # Need to find the appropriate model for the language -- 256 languages is the first that contains MOS
+    processor = AutoFeatureExtractor.from_pretrained(model_id)
+    model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+    # Process the audio
+    signal, sampling_rate =  librosa.load(fp, sr=16000)
+    inputs = processor(signal, sampling_rate=16_000, return_tensors="pt")
+    # Inference
+    with torch.no_grad():
+        outputs = model(**inputs).logits
+    lang_id = torch.argmax(outputs, dim=-1)[0].item()
+    detected_lang = model.config.id2label[lang_id]
+    return detected_lang

src/speech_to_text.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import librosa
+import torch
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+from transformers import set_seed
+def transcribe(fp:str, target_lang:str) -> str:
+    '''
+    For given audio file, transcribe it.
+    Parameters
+    ----------
+    fp: str
+        The file path to the audio file.
+    target_lang:str
+        The ISO-3 code of the target language.
+    Returns
+    ----------
+    transcript:str
+        The transcribed text.
+    '''
+    # Ensure replicability
+    set_seed(555)
+    # Load transcription model
+    model_id = "facebook/mms-1b-all"
+    target_lang = "mos"
+    processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+    model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+    # Process the audio
+    signal, sampling_rate =  librosa.load(fp, sr=16000)
+    inputs = processor(signal, sampling_rate=16_000, return_tensors="pt")
+    # Inference
+    with torch.no_grad():
+        outputs = model(**inputs).logits
+    ids = torch.argmax(outputs, dim=-1)[0]
+    transcript = processor.decode(ids)
+    return transcript

src/text_to_speech.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+from transformers import set_seed
+from transformers import VitsTokenizer, VitsModel
+from IPython.display import Audio as Aud
+def synthesize_facebook(s:str, iso3:str) -> str:
+    '''
+    For given text, speak it.
+    Parameters
+    ----------
+    s: str
+        The written text.
+    is03:str
+        The ISO-3 code of the text's language.
+    Returns
+    ----------
+    synth:str
+        The synthesized audio.
+    '''
+    # Load synthesizer
+    tokenizer = VitsTokenizer.from_pretrained(f"facebook/mms-tts-{iso3}")
+    model = VitsModel.from_pretrained(f"facebook/mms-tts-{iso3}")
+    inputs = tokenizer(text=s, return_tensors="pt")
+    # Inference
+    with torch.no_grad():
+       outputs = model(**inputs)
+    synth = outputs.waveform[0]
+    return synth.numpy()

src/translation.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import torch
+from transformers import set_seed, pipeline
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+######### HELSINKI NLP ##################
+def translate_helsinki_nlp(s:str, src_iso:str, dest_iso:str)-> str:
+    '''
+    Translate the text using HelsinkiNLP's Opus models for Mossi language.
+    Parameters
+    ----------
+    s: str
+        The text
+    src_iso:
+        The ISO-3 code of the source language
+    dest_iso:
+        The ISO-3 code of the destination language
+    Returns
+    ----------
+    translation:str
+        The translated text
+    '''
+    # Ensure replicability
+    set_seed(555)
+    # Inference
+    translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_iso}-{dest_iso}")
+    translation = translator(s)[0]['translation_text']
+    return translation
+######### MASAKHANE ##################
+def translate_masakhane(s:str, src_iso:str, dest_iso:str)-> str:
+    '''
+    Translate the text using Masakhane's M2M models for Mossi language.
+    Parameters
+    ----------
+    s: str
+        The text
+    src_iso:
+        The ISO-3 code of the source language
+    dest_iso:
+        The ISO-3 code of the destination language
+    Returns
+    ----------
+    translation:str
+        The translated text
+    '''
+    # Ensure replicability
+    set_seed(555)
+    # Load model
+    model = M2M100ForConditionalGeneration.from_pretrained(f"masakhane/m2m100_418m_{src_iso}_{dest_iso}_news")
+    tokenizer = M2M100Tokenizer.from_pretrained(f"masakhane/m2m100_418m_{src_iso}_{dest_iso}_news")
+    # Inference
+    encoded = tokenizer(s, return_tensors="pt")
+    generated_tokens = model.generate(**encoded)
+    translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+    return translation
+######### META ##################
+def translate_facebook(s:str, src_iso:str, dest_iso:str)-> str:
+    '''
+    Translate the text using Meta's NLLB model for Mossi language.
+    Parameters
+    ----------
+    s: str
+        The text
+    src_iso:
+        The ISO-3 code of the source language
+    dest_iso:
+        The ISO-3 code of the destination language
+    Returns
+    ----------
+    translation:str
+        The translated text
+    '''
+    # Ensure replicability
+    set_seed(555)
+    # Load model
+    tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", use_auth_token=True, src_lang=f"{src_iso}_Latn")
+    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", use_auth_token=True)
+    # Inference
+    encoded = tokenizer(s, return_tensors="pt")
+    translated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.lang_code_to_id[f"{dest_iso}_Latn"], max_length=30)
+    translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+    return translation
+######### ALL OF THE ABOVE ##################
+def translate(s, src_iso, dest_iso):
+    '''
+    Translate the text using all available models (Meta, Masakhane, and Helsinki NLP where applicable).
+    Parameters
+    ----------
+    s: str
+        The text
+    src_iso:
+        The ISO-3 code of the source language
+    dest_iso:
+        The ISO-3 code of the destination language
+    Returns
+    ----------
+    translation:str
+        The translated text, concatenated over different models
+    '''
+    # Translate with Meta NLLB
+    translation= "Meta's NLLB translation is:\n\n" + translate_facebook(s, src_iso, dest_iso)
+    # Check if the ISO pair is supported by another model and if so, add to translation
+    iso_pair = f"{src_iso}-{dest_iso}"
+    if iso_pair in ["mos-eng", 'eng-mos', 'fra-mos']:
+        src_iso = src_iso.lower().replace("eng", "en").replace("fra", "fr")
+        dest_iso = dest_iso.replace("eng", "en").replace("fra", "fr")
+        translation+= f"\n\n\nHelsinkiNLP's Opus translation is:\n\n {translate_helsinki_nlp(s, src_iso, dest_iso)}"
+    if iso_pair in ["mos-fra", "fra-mos"]:
+        src_iso = src_iso.lower().replace("fra", "fr")
+        dest_iso = dest_iso.replace("fra", "fr")
+        translation+= "\n\n\nMasakhane's M2M translation is:\n\n" + translate_masakhane(s, src_iso, dest_iso)
+    return translation