File size: 2,598 Bytes
1e090f2 ad81fab 2dd5f48 da685d1 91edc36 da685d1 91edc36 be5f920 da685d1 cf1be34 da685d1 b8c5e67 be5f920 a50abbe da685d1 180aa71 91edc36 180aa71 da685d1 f43e8e6 be5f920 70e51f5 adc53e2 da685d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
import librosa
from transformers import AutoFeatureExtractor, AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
def load_and_fix_data(input_file, model_sampling_rate):
speech, sample_rate = librosa.load(input_file)
if len(speech.shape) > 1:
speech = speech[:, 0] + speech[:, 1]
if sample_rate != model_sampling_rate:
speech = librosa.resample(speech, sample_rate, model_sampling_rate)
return speech
feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
sampling_rate = feature_extractor.sampling_rate
asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")
model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
new_line = '\n'
def predict_and_ctc_lm_decode(input_file):
speech = load_and_fix_data(input_file, sampling_rate)
transcribed_text = asr(speech, chunk_length_s=10, stride_length_s=1)
transcribed_text = transcribed_text["text"]
input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
outputs = model.generate(input_ids, max_length=512)
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
description = """ This is a Gradio demo of Spanish Audio Transcriptions to Nahuatl Translation. To use this, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and translated to the Nahuatl language.
Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
Pre-trained model used for translating Spanish audio transcription to the Nahuatl language: [hackathon-pln-es/t5-small-spanish-nahuatl](https://huggingface.co/hackathon-pln-es/t5-small-spanish-nahuatl)
"""
gr.Interface(
predict_and_ctc_lm_decode,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
],
outputs=[gr.outputs.Textbox()],
examples=[["audio1.wav"], ["travel.wav"], ["sample_audio.wav"]],
title="Spanish-Audio-Transcriptions-to-Nahuatl-Translation",
description = description,
#article="<p><center><img src='........e'></center></p>",
layout="horizontal",
theme="huggingface",
).launch(enable_queue=True, cache_examples=True)
|