vasudevgupta's picture
Update app.py
71c9f80
raw
history blame contribute delete
817 Bytes
import gradio as gr
import tensorflow as tf
from wav2vec2 import Wav2Vec2Processor, Wav2Vec2ForCTC
if __name__ == '__main__':
processor = Wav2Vec2Processor(is_tokenizer=False)
tokenizer = Wav2Vec2Processor(is_tokenizer=True)
model = Wav2Vec2ForCTC.from_pretrained("vasudevgupta/gsoc-wav2vec2-960h")
def _forward(speech: tf.Tensor):
speech = processor(speech)[None]
tf_out = model(speech, training=False)
return tf.squeeze(tf.argmax(tf_out, axis=-1))
def transcribe_text(inputs):
_, speech = inputs
speech = tf.constant(speech, dtype=tf.float32)
speech = tf.transpose(speech)
tf_out = _forward(speech)
return tokenizer.decode(tf_out.numpy().tolist())
gr.Interface(fn=transcribe_text, inputs="audio", outputs="text").launch()