Spaces:

khof312
/

demo_language_moore

Running

demo_language_moore / src /text_to_speech.py

Time execution and fix small bug in STT.

a84c313 12 months ago

935 Bytes

	import time
	import torch
	from transformers import set_seed
	from transformers import VitsTokenizer, VitsModel

	def synthesize_facebook(s:str, iso3:str) -> str:
	'''
	For given text, speak it.

	Parameters
	----------
	s: str
	The written text.
	is03:str
	The ISO-3 code of the text's language.

	Returns
	----------
	synth:str
	The synthesized audio.
	'''

	# Ensure replicability
	set_seed(555)
	start_time = time.time()

	# Load synthesizer
	tokenizer = VitsTokenizer.from_pretrained(f"facebook/mms-tts-{iso3}")
	model = VitsModel.from_pretrained(f"facebook/mms-tts-{iso3}")

	inputs = tokenizer(text=s, return_tensors="pt")

	# Inference
	with torch.no_grad():
	outputs = model(**inputs)

	synth = outputs.waveform[0]

	print("Time elapsed: ", int(time.time() - start_time), " seconds")
	return synth.numpy()