tts_mockingbird

Sleeping

App Files Files Community

tts_mockingbird / src /synthesize.py

khof312

Remove sudo from the espeak-ng command.

fac62ef verified 4 months ago

raw

history blame

2.45 kB

	import IPython
	from huggingface_hub.inference_api import InferenceApi
	import torch
	from TTS.api import TTS
	import wave
	from espeakng import ESpeakNG
	import subprocess
	from scipy.io import wavfile
	from transformers import pipeline
	import os

	def synth_mms(text:str, model:str):
	'''
	Use Huggingface inference pipeline to synthesize text.
	(Can be replaced by inference API, but that requires stored API token.)

	Inputs:
	text: Text to synthesze
	model: Model code of the form mms-tts-LAN
	Returns:
	Streaming numpy and sampling rate.
	'''
	#inference = InferenceApi(repo_id=f"facebook/{model}",
	# token=API_TOKEN)
	#mms_tts = inference(inputs=text,
	# raw_response=True)._content

	if model is not None:
	pipe = pipeline("text-to-speech", model=model, device=-1) # Change device if it should use GPU
	mms_tts = pipe(text)
	return mms_tts['audio'], mms_tts['sampling_rate']
	else:
	return None



	def synth_coqui(text:str, model:str):
	'''
	Use Coqui inference API to synthesize text.

	Inputs:
	text: Text to synthesze
	model: Model code
	Returns:
	Streaming Wav and sampling rate.
	'''
	if model is not None:
	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Init TTS
	tts = TTS(model, progress_bar=False).to(device)

	tts.tts_to_file(text=text, file_path="test.wav", is_multi_speaker=False)

	sampling_rate, wav = wavfile.read('test.wav')
	os.remove("test.wav")

	#wav = tts.tts(text=text)
	return wav, sampling_rate
	else:
	return None


	def synth_espeakng(text:str, model:str):
	'''
	Use ESpeak-NG to synthesize text.

	Inputs:
	text: Text to synthesze
	model: Model code
	Returns:
	Streaming Wav and sampling rate.
	'''
	if model is not None:

	subprocess.run(["apt-get", "install","espeak-ng"]) # Make sure espeak is installed -- not the most elegant solution, can swap for python API when that is resolved
	subprocess.run(['espeak-ng', f'-v{model}', "-w test.wav", text]) #.returncode

	sampling_rate, wav = wavfile.read('test.wav')
	os.remove("test.wav")

	#wav = tts.tts(text=text)
	return wav, sampling_rate
	else:
	return None