DrishtiSharma commited on
Commit
44d4c43
1 Parent(s): b9e9c6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -19,8 +19,9 @@ asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-la
19
 
20
 
21
 
22
- model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
23
- tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
 
24
 
25
  new_line = '\n'
26
 
@@ -28,9 +29,9 @@ def predict_and_ctc_lm_decode(input_file):
28
  speech = load_and_fix_data(input_file, sampling_rate)
29
  transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
30
  transcribed_text = transcribed_text["text"]
31
- input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
32
- outputs = model.generate(input_ids, max_length=512)
33
- outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
34
  return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
35
 
36
 
 
19
 
20
 
21
 
22
+ model_name = 'hackathon-pln-es/t5-small-finetuned-spanish-to-quechua'
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
 
26
  new_line = '\n'
27
 
 
29
  speech = load_and_fix_data(input_file, sampling_rate)
30
  transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
31
  transcribed_text = transcribed_text["text"]
32
+ input = tokenizer(transcribed_text, return_tensors="pt")
33
+ output = model.generate(input["input_ids"], max_length=40, num_beams=4, early_stopping=True)
34
+ output = tokenizer.decode(output[0], skip_special_tokens=True)
35
  return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
36
 
37