Spaces:

ntt123
/

vietTTS

Running

vietTTS / app.py

Update app.py

fd46c98 almost 3 years ago

1.45 kB

	from vietTTS.hifigan.mel2wave import mel2wave
	from vietTTS.nat.text2mel import text2mel
	from vietTTS import nat_normalize_text
	import numpy as np
	import gradio as gr
	import os


	def download_assets():
	os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
	os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
	os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
	os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/hifigan/config.json")
	os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/infore/lexicon.txt")

	def text_to_speech(text):
	# prevent too long text
	if len(text) > 500:
	text = text[:500]
	text = nat_normalize_text(text)
	mel = text2mel(
	text,
	"lexicon.txt",
	0.2,
	"acoustic_latest_ckpt.pickle",
	"duration_latest_ckpt.pickle",
	)
	wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
	return (wave * (2**15)).astype(np.int16)


	def speak(text):
	y = text_to_speech(text)
	return 16_000, y


	download_assets()

	title = "vietTTS"
	description = "A vietnamese text-to-speech demo."

	gr.Interface(
	fn=speak,
	inputs="text",
	outputs="audio",
	title = title,
	description=description,
	theme="default",
	allow_screenshot=False,
	allow_flagging="never",
	).launch(debug=False)