DrishtiSharma
commited on
Commit
•
44d4c43
1
Parent(s):
b9e9c6d
Update app.py
Browse files
app.py
CHANGED
@@ -19,8 +19,9 @@ asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-la
|
|
19 |
|
20 |
|
21 |
|
22 |
-
|
23 |
-
|
|
|
24 |
|
25 |
new_line = '\n'
|
26 |
|
@@ -28,9 +29,9 @@ def predict_and_ctc_lm_decode(input_file):
|
|
28 |
speech = load_and_fix_data(input_file, sampling_rate)
|
29 |
transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
|
30 |
transcribed_text = transcribed_text["text"]
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
|
35 |
|
36 |
|
|
|
19 |
|
20 |
|
21 |
|
22 |
+
model_name = 'hackathon-pln-es/t5-small-finetuned-spanish-to-quechua'
|
23 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
25 |
|
26 |
new_line = '\n'
|
27 |
|
|
|
29 |
speech = load_and_fix_data(input_file, sampling_rate)
|
30 |
transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
|
31 |
transcribed_text = transcribed_text["text"]
|
32 |
+
input = tokenizer(transcribed_text, return_tensors="pt")
|
33 |
+
output = model.generate(input["input_ids"], max_length=40, num_beams=4, early_stopping=True)
|
34 |
+
output = tokenizer.decode(output[0], skip_special_tokens=True)
|
35 |
return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
|
36 |
|
37 |
|