khof312 commited on
Commit
e5e9b34
1 Parent(s): 4a66b98

Initial commit of the app.

Browse files
moore_app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import scipy
3
+ import os
4
+ import streamlit as st
5
+ from transformers import set_seed, pipeline
6
+ from transformers import VitsTokenizer, VitsModel
7
+ from datasets import load_dataset, Audio
8
+ from IPython.display import Audio as Aud
9
+ from src import *
10
+
11
+ from huggingface_hub import login
12
+ from dotenv import load_dotenv
13
+
14
+ #load_dotenv()
15
+ #HUGGINGFACE_KEY = os.environ.get("HUGGINGFACE_KEY")
16
+ #login(HUGGINGFACE_KEY)
17
+
18
+
19
+ ########################
20
+ language_list = ['mos', 'fra', 'eng']
21
+
22
+
23
+ st.title("Demo: Automated Tools for Mooré Language")
24
+ tts, stt, trans, lid = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID"])
25
+
26
+ ########################
27
+ with tts:
28
+
29
+ tts_text = st.text_area(label = "Please enter your text here:", value="", placeholder="ne y wĩndga")
30
+
31
+ tts_col1, tts_col2, = st.columns(2)
32
+
33
+ with tts_col1:
34
+ tts_lang = st.selectbox('Language of text', (language_list), format_func = decode_iso)
35
+
36
+
37
+
38
+ if st.button("Speak"):
39
+ st.divider()
40
+ with st.spinner(":rainbow[Synthesizing, please wait...]"):
41
+ synth = synthesize_facebook(tts_text, tts_lang)
42
+ st.audio(synth, sample_rate=16_000)
43
+
44
+ ########################
45
+ with stt:
46
+
47
+ stt_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "stt_uploader")
48
+ stt_lang = st.selectbox("Please select the language:" , (language_list), format_func = decode_iso)
49
+
50
+
51
+ if st.button("Transcribe"):
52
+ st.divider()
53
+ with st.spinner("rainbow[Received your file, please wait while I process it...]"):
54
+ stt = transcribe(stt_file, stt_lang)
55
+ ":violet[The transcription is:]"
56
+ ':violet[ "' + stt + '"]'
57
+
58
+ ########################
59
+ with trans:
60
+
61
+ trans_text = st.text_area(label = "Please enter your translation text here:", value="", placeholder="ne y wĩndga")
62
+ #trans_col1, trans_col2, trans_col3 = st.columns([.25, .25, .5])
63
+ trans_col1, trans_col2 = st.columns(2)
64
+
65
+ with trans_col1:
66
+ src_lang = st.selectbox('Translate from:', (language_list), format_func = decode_iso)
67
+ with trans_col2:
68
+ target_lang = st.selectbox('Translate to:', (language_list), format_func = decode_iso, index=1)
69
+ #with trans_col3:
70
+ # trans_model = st.selectbox("Translation model:",
71
+ # ("Facebook (nllb-200-distilled-600M)",
72
+ # "Helsinki NLP (opus-mt-mos-en)",
73
+ # "Masakhane (m2m100_418m_mos_fr_news)")
74
+ # )
75
+
76
+
77
+ if st.button("Translate"):
78
+ st.divider()
79
+ with st.spinner(":rainbow[Translating from " + decode_iso(src_lang) + " into " + decode_iso(target_lang) + ", please wait...]"):
80
+ translation = translate(trans_text, src_lang, target_lang) #, trans_model)
81
+ translation
82
+
83
+ ########################
84
+ with lid:
85
+ langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")
86
+
87
+ if st.button("Identify"):
88
+ st.divider()
89
+ with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
90
+ lang = identify_language(langid_file)
91
+ lang = decode_iso(lang)
92
+ ":violet[The detected language is " + lang + "]"
93
+
94
+
95
+ # supported colors: blue, green, orange, red, violet, gray/grey, rainbow.
96
+ # https://docs.streamlit.io/library/api-reference/text/st.markdown
97
+
98
+
src/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .helpers import *
2
+ from .language_id import *
3
+ from .speech_to_text import *
4
+ from .text_to_speech import *
5
+ from .translation import *
src/helpers.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ iso_encoder = {"English":"eng",
3
+ "French":"fra",
4
+ "Moore": "mos"}
5
+
6
+ iso_decoder = dict((v,k) for k,v in iso_encoder.items())
7
+
8
+ import pycountry
9
+
10
+ def encode_iso(lang:str)-> str:
11
+ ''' Takes the name of a language and returns its ISO-3 code. '''
12
+ return iso_encoder[lang]
13
+
14
+ def decode_iso(iso:str)-> str:
15
+ ''' Takes an ISO-3 code and returns the name of the language. '''
16
+ return pycountry.languages.get(alpha_3 = iso).name
src/language_id.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import librosa
3
+ import torch
4
+ from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
5
+ from transformers import set_seed
6
+
7
+ def identify_language(fp:str) -> str:
8
+ '''
9
+ For given audio file, identify what language it uses.
10
+
11
+ Parameters
12
+ ----------
13
+ fp: str
14
+ The file path to the audio file.
15
+
16
+ Returns
17
+ ----------
18
+ detected_lang:str
19
+ The iso3 code of the detected language.
20
+
21
+ '''
22
+ # Ensure replicability
23
+ set_seed(555)
24
+
25
+ # Load language ID model
26
+ model_id = "facebook/mms-lid-256" # Need to find the appropriate model for the language -- 256 languages is the first that contains MOS
27
+ processor = AutoFeatureExtractor.from_pretrained(model_id)
28
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
29
+
30
+ # Process the audio
31
+ signal, sampling_rate = librosa.load(fp, sr=16000)
32
+ inputs = processor(signal, sampling_rate=16_000, return_tensors="pt")
33
+
34
+ # Inference
35
+ with torch.no_grad():
36
+ outputs = model(**inputs).logits
37
+
38
+ lang_id = torch.argmax(outputs, dim=-1)[0].item()
39
+ detected_lang = model.config.id2label[lang_id]
40
+
41
+ return detected_lang
src/speech_to_text.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import librosa
3
+ import torch
4
+ from transformers import Wav2Vec2ForCTC, AutoProcessor
5
+ from transformers import set_seed
6
+
7
+
8
+ def transcribe(fp:str, target_lang:str) -> str:
9
+ '''
10
+ For given audio file, transcribe it.
11
+
12
+ Parameters
13
+ ----------
14
+ fp: str
15
+ The file path to the audio file.
16
+ target_lang:str
17
+ The ISO-3 code of the target language.
18
+
19
+ Returns
20
+ ----------
21
+ transcript:str
22
+ The transcribed text.
23
+ '''
24
+ # Ensure replicability
25
+ set_seed(555)
26
+
27
+ # Load transcription model
28
+ model_id = "facebook/mms-1b-all"
29
+ target_lang = "mos"
30
+
31
+ processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
32
+ model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
33
+
34
+ # Process the audio
35
+ signal, sampling_rate = librosa.load(fp, sr=16000)
36
+ inputs = processor(signal, sampling_rate=16_000, return_tensors="pt")
37
+
38
+ # Inference
39
+ with torch.no_grad():
40
+ outputs = model(**inputs).logits
41
+
42
+ ids = torch.argmax(outputs, dim=-1)[0]
43
+ transcript = processor.decode(ids)
44
+
45
+ return transcript
src/text_to_speech.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ from transformers import set_seed
4
+ from transformers import VitsTokenizer, VitsModel
5
+ from IPython.display import Audio as Aud
6
+
7
+ def synthesize_facebook(s:str, iso3:str) -> str:
8
+ '''
9
+ For given text, speak it.
10
+
11
+ Parameters
12
+ ----------
13
+ s: str
14
+ The written text.
15
+ is03:str
16
+ The ISO-3 code of the text's language.
17
+
18
+ Returns
19
+ ----------
20
+ synth:str
21
+ The synthesized audio.
22
+ '''
23
+ # Load synthesizer
24
+ tokenizer = VitsTokenizer.from_pretrained(f"facebook/mms-tts-{iso3}")
25
+ model = VitsModel.from_pretrained(f"facebook/mms-tts-{iso3}")
26
+
27
+ inputs = tokenizer(text=s, return_tensors="pt")
28
+
29
+ # Inference
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+
33
+ synth = outputs.waveform[0]
34
+
35
+ return synth.numpy()
src/translation.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import set_seed, pipeline
3
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
4
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
+
6
+ ######### HELSINKI NLP ##################
7
+ def translate_helsinki_nlp(s:str, src_iso:str, dest_iso:str)-> str:
8
+ '''
9
+ Translate the text using HelsinkiNLP's Opus models for Mossi language.
10
+
11
+ Parameters
12
+ ----------
13
+ s: str
14
+ The text
15
+ src_iso:
16
+ The ISO-3 code of the source language
17
+ dest_iso:
18
+ The ISO-3 code of the destination language
19
+
20
+ Returns
21
+ ----------
22
+ translation:str
23
+ The translated text
24
+ '''
25
+ # Ensure replicability
26
+ set_seed(555)
27
+
28
+ # Inference
29
+ translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_iso}-{dest_iso}")
30
+ translation = translator(s)[0]['translation_text']
31
+
32
+ return translation
33
+
34
+ ######### MASAKHANE ##################
35
+ def translate_masakhane(s:str, src_iso:str, dest_iso:str)-> str:
36
+ '''
37
+ Translate the text using Masakhane's M2M models for Mossi language.
38
+
39
+ Parameters
40
+ ----------
41
+ s: str
42
+ The text
43
+ src_iso:
44
+ The ISO-3 code of the source language
45
+ dest_iso:
46
+ The ISO-3 code of the destination language
47
+
48
+ Returns
49
+ ----------
50
+ translation:str
51
+ The translated text
52
+ '''
53
+ # Ensure replicability
54
+ set_seed(555)
55
+
56
+ # Load model
57
+ model = M2M100ForConditionalGeneration.from_pretrained(f"masakhane/m2m100_418m_{src_iso}_{dest_iso}_news")
58
+ tokenizer = M2M100Tokenizer.from_pretrained(f"masakhane/m2m100_418m_{src_iso}_{dest_iso}_news")
59
+
60
+ # Inference
61
+ encoded = tokenizer(s, return_tensors="pt")
62
+ generated_tokens = model.generate(**encoded)
63
+ translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
64
+
65
+ return translation
66
+
67
+ ######### META ##################
68
+ def translate_facebook(s:str, src_iso:str, dest_iso:str)-> str:
69
+ '''
70
+ Translate the text using Meta's NLLB model for Mossi language.
71
+
72
+ Parameters
73
+ ----------
74
+ s: str
75
+ The text
76
+ src_iso:
77
+ The ISO-3 code of the source language
78
+ dest_iso:
79
+ The ISO-3 code of the destination language
80
+
81
+ Returns
82
+ ----------
83
+ translation:str
84
+ The translated text
85
+ '''
86
+ # Ensure replicability
87
+ set_seed(555)
88
+
89
+ # Load model
90
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", use_auth_token=True, src_lang=f"{src_iso}_Latn")
91
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", use_auth_token=True)
92
+
93
+ # Inference
94
+ encoded = tokenizer(s, return_tensors="pt")
95
+ translated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.lang_code_to_id[f"{dest_iso}_Latn"], max_length=30)
96
+ translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
97
+
98
+ return translation
99
+
100
+
101
+ ######### ALL OF THE ABOVE ##################
102
+ def translate(s, src_iso, dest_iso):
103
+ '''
104
+ Translate the text using all available models (Meta, Masakhane, and Helsinki NLP where applicable).
105
+
106
+ Parameters
107
+ ----------
108
+ s: str
109
+ The text
110
+ src_iso:
111
+ The ISO-3 code of the source language
112
+ dest_iso:
113
+ The ISO-3 code of the destination language
114
+
115
+ Returns
116
+ ----------
117
+ translation:str
118
+ The translated text, concatenated over different models
119
+ '''
120
+ # Translate with Meta NLLB
121
+ translation= "Meta's NLLB translation is:\n\n" + translate_facebook(s, src_iso, dest_iso)
122
+
123
+ # Check if the ISO pair is supported by another model and if so, add to translation
124
+ iso_pair = f"{src_iso}-{dest_iso}"
125
+ if iso_pair in ["mos-eng", 'eng-mos', 'fra-mos']:
126
+ src_iso = src_iso.lower().replace("eng", "en").replace("fra", "fr")
127
+ dest_iso = dest_iso.replace("eng", "en").replace("fra", "fr")
128
+ translation+= f"\n\n\nHelsinkiNLP's Opus translation is:\n\n {translate_helsinki_nlp(s, src_iso, dest_iso)}"
129
+
130
+ if iso_pair in ["mos-fra", "fra-mos"]:
131
+ src_iso = src_iso.lower().replace("fra", "fr")
132
+ dest_iso = dest_iso.replace("fra", "fr")
133
+ translation+= "\n\n\nMasakhane's M2M translation is:\n\n" + translate_masakhane(s, src_iso, dest_iso)
134
+
135
+ return translation
136
+