Spaces:

Detomo
/

audio-stream-translate

Build error

App Files Files Community

audio-stream-translate / app.py

vumichien

Update app.py

8f16942 about 2 years ago

raw

history blame contribute delete

1.96 kB

	import gradio as gr
	import librosa
	from optimum.onnxruntime import ORTModelForSeq2SeqLM
	from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import torch

	# load model and processor
	processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")
	model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")

	tokenizer = AutoTokenizer.from_pretrained("icon-it-tdtu/mt-en-vi-optimum")
	model_lm = ORTModelForSeq2SeqLM.from_pretrained("icon-it-tdtu/mt-en-vi-optimum")

	def process_audio_file(file):
	data, sr = librosa.load(file)
	if sr != 16000:
	data = librosa.resample(data, sr, 16000)
	inputs = processor(data, sampling_rate=16000, return_tensors="pt", padding=True)
	return inputs


	def transcribe(file, state=""):
	inputs = process_audio_file(file)
	with torch.no_grad():
	output_logit = model(inputs.input_values).logits
	pred_ids = torch.argmax(output_logit, dim=-1)
	text = processor.batch_decode(pred_ids)[0].lower()
	print(text)
	text = translate(text)
	state += text + " "
	return state, state


	def translate(text):
	batch = tokenizer([text], return_tensors="pt")
	generated_ids = model_lm.generate(**batch)
	translated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return translated_text


	# Set the starting state to an empty string

	iface = gr.Interface(
	fn=transcribe,
	title="Streaming interpret English to Vietnamese",
	description="A simple interface to streaming interpret from spoken English to Vietnamese.",
	article="Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=True),
	"state"
	],
	outputs=[
	"textbox",
	"state"
	])

	iface.launch(enable_queue=True, debug=True)