|
from typing import Union |
|
|
|
from modules.SentenceSplitter import SentenceSplitter |
|
from modules.speaker import Speaker |
|
from modules.ssml_parser.SSMLParser import SSMLSegment |
|
from modules.SynthesizeSegments import SynthesizeSegments, combine_audio_segments |
|
from modules.utils import audio |
|
|
|
|
|
def synthesize_audio( |
|
text: str, |
|
temperature: float = 0.3, |
|
top_P: float = 0.7, |
|
top_K: float = 20, |
|
spk: Union[int, Speaker] = -1, |
|
infer_seed: int = -1, |
|
use_decoder: bool = True, |
|
prompt1: str = "", |
|
prompt2: str = "", |
|
prefix: str = "", |
|
batch_size: int = 1, |
|
spliter_threshold: int = 100, |
|
end_of_sentence="", |
|
): |
|
spliter = SentenceSplitter(spliter_threshold) |
|
sentences = spliter.parse(text) |
|
|
|
text_segments = [ |
|
SSMLSegment( |
|
text=s, |
|
params={ |
|
"temperature": temperature, |
|
"top_P": top_P, |
|
"top_K": top_K, |
|
"spk": spk, |
|
"infer_seed": infer_seed, |
|
"use_decoder": use_decoder, |
|
"prompt1": prompt1, |
|
"prompt2": prompt2, |
|
"prefix": prefix, |
|
}, |
|
) |
|
for s in sentences |
|
] |
|
synthesizer = SynthesizeSegments( |
|
batch_size=batch_size, eos=end_of_sentence, spliter_thr=spliter_threshold |
|
) |
|
audio_segments = synthesizer.synthesize_segments(text_segments) |
|
|
|
combined_audio = combine_audio_segments(audio_segments) |
|
|
|
return audio.pydub_to_np(combined_audio) |
|
|