Spaces:
Runtime error
Runtime error
datnth1709
commited on
Commit
•
19ac608
1
Parent(s):
812d36d
update
Browse files- app.py +6 -18
- requirements.txt +8 -13
app.py
CHANGED
@@ -1,18 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
import nltk
|
3 |
import librosa
|
4 |
-
from
|
5 |
-
|
6 |
-
from transformers import pipeline, TranslationPipeline, AutoTokenizer, TranslationPipeline
|
7 |
-
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
8 |
from transformers.file_utils import cached_path, hf_bucket_url
|
9 |
import os, zipfile
|
|
|
10 |
from datasets import load_dataset
|
11 |
import torch
|
12 |
import kenlm
|
13 |
import torchaudio
|
14 |
from pyctcdecode import Alphabet, BeamSearchDecoderCTC, LanguageModel
|
15 |
-
device = torch.device(0 if torch.cuda.is_available() else "cpu")
|
16 |
|
17 |
"""Vietnamese speech2text"""
|
18 |
cache_dir = './cache/'
|
@@ -126,17 +123,8 @@ def speech2text_en(input_file):
|
|
126 |
"""Machine translation"""
|
127 |
vien_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-vi-en_PhoMT"
|
128 |
envi_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-en-vi_PhoMT"
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
vien_tokenizer = AutoTokenizer.from_pretrained(vien_model_checkpoint, return_tensors="pt")
|
133 |
-
vien_model = ORTModelForSeq2SeqLM.from_pretrained(vien_model_checkpoint)
|
134 |
-
vien_translator = TranslationPipeline(model=vien_model, tokenizer=vien_tokenizer,clean_up_tokenization_spaces=True, device=device)
|
135 |
-
|
136 |
-
envi_tokenizer = AutoTokenizer.from_pretrained(envi_model_checkpoint, return_tensors="pt")
|
137 |
-
envi_model = ORTModelForSeq2SeqLM.from_pretrained(envi_model_checkpoint)
|
138 |
-
envi_translator = TranslationPipeline(model=envi_model, tokenizer=envi_tokenizer,clean_up_tokenization_spaces=True, device=device)
|
139 |
-
|
140 |
|
141 |
def translate_vi2en(Vietnamese):
|
142 |
return vien_translator(Vietnamese)[0]['translation_text']
|
@@ -188,9 +176,9 @@ def transcribe_en(audio, state_en="", state_vi=""):
|
|
188 |
transcription = eng_tokenizer.decode(predicted_ids[0])
|
189 |
# Output is all upper case
|
190 |
transcription = correct_casing(transcription.lower())
|
191 |
-
state_en += transcription + "
|
192 |
vi_text = translate_en2vi(transcription)
|
193 |
-
state_vi += vi_text + "
|
194 |
return state_en, state_vi
|
195 |
|
196 |
"""Gradio demo"""
|
|
|
1 |
import gradio as gr
|
2 |
import nltk
|
3 |
import librosa
|
4 |
+
from transformers import pipeline
|
|
|
|
|
|
|
5 |
from transformers.file_utils import cached_path, hf_bucket_url
|
6 |
import os, zipfile
|
7 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
8 |
from datasets import load_dataset
|
9 |
import torch
|
10 |
import kenlm
|
11 |
import torchaudio
|
12 |
from pyctcdecode import Alphabet, BeamSearchDecoderCTC, LanguageModel
|
|
|
13 |
|
14 |
"""Vietnamese speech2text"""
|
15 |
cache_dir = './cache/'
|
|
|
123 |
"""Machine translation"""
|
124 |
vien_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-vi-en_PhoMT"
|
125 |
envi_model_checkpoint = "datnth1709/finetuned_HelsinkiNLP-opus-mt-en-vi_PhoMT"
|
126 |
+
vien_translator = pipeline("translation", model=vien_model_checkpoint)
|
127 |
+
envi_translator = pipeline("translation", model=envi_model_checkpoint)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
def translate_vi2en(Vietnamese):
|
130 |
return vien_translator(Vietnamese)[0]['translation_text']
|
|
|
176 |
transcription = eng_tokenizer.decode(predicted_ids[0])
|
177 |
# Output is all upper case
|
178 |
transcription = correct_casing(transcription.lower())
|
179 |
+
state_en += transcription + " "
|
180 |
vi_text = translate_en2vi(transcription)
|
181 |
+
state_vi += vi_text + " "
|
182 |
return state_en, state_vi
|
183 |
|
184 |
"""Gradio demo"""
|
requirements.txt
CHANGED
@@ -1,22 +1,17 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
transformers
|
4 |
-
datasets==
|
5 |
-
optimum==1.3.0
|
6 |
-
sacremoses==0.0.53
|
7 |
-
onnxruntime==1.12.1
|
8 |
-
onnx==1.12.0
|
9 |
-
optimum[graphcore]
|
10 |
-
torch
|
11 |
-
torchaudio
|
12 |
pyctcdecode==v0.1.0
|
13 |
speechbrain
|
14 |
pydub
|
15 |
kenlm
|
|
|
16 |
soundfile
|
17 |
ffmpeg-python
|
18 |
gradio
|
19 |
nltk
|
20 |
librosa
|
21 |
-
|
22 |
-
|
|
|
|
1 |
+
torch==1.9.0
|
2 |
+
torchaudio==0.9.0
|
3 |
+
transformers==4.9.2
|
4 |
+
datasets==1.11.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
pyctcdecode==v0.1.0
|
6 |
speechbrain
|
7 |
pydub
|
8 |
kenlm
|
9 |
+
pyctcdecode
|
10 |
soundfile
|
11 |
ffmpeg-python
|
12 |
gradio
|
13 |
nltk
|
14 |
librosa
|
15 |
+
transformers
|
16 |
+
transformers[sentencepiece]
|
17 |
+
https://github.com/kpu/kenlm/archive/master.zip
|