DrishtiSharma
commited on
Commit
•
cf1be34
1
Parent(s):
4fdac5b
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,7 @@ asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-la
|
|
22 |
model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
|
23 |
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
|
24 |
|
|
|
25 |
|
26 |
def predict_and_ctc_lm_decode(input_file):
|
27 |
speech = load_and_fix_data(input_file, sampling_rate)
|
@@ -30,7 +31,7 @@ def predict_and_ctc_lm_decode(input_file):
|
|
30 |
input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
|
31 |
outputs = model.generate(input_ids, max_length=512)
|
32 |
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
33 |
-
return f"Spanish Audio Transcription: {transcribed_text}
|
34 |
|
35 |
|
36 |
gr.Interface(
|
|
|
22 |
model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
|
23 |
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
|
24 |
|
25 |
+
new_line = '\n'
|
26 |
|
27 |
def predict_and_ctc_lm_decode(input_file):
|
28 |
speech = load_and_fix_data(input_file, sampling_rate)
|
|
|
31 |
input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
|
32 |
outputs = model.generate(input_ids, max_length=512)
|
33 |
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
34 |
+
return f"Spanish Audio Transcription: {transcribed_text} {new_line} The corresponding Nahuatl Translation is :{outputs}"
|
35 |
|
36 |
|
37 |
gr.Interface(
|