File size: 1,504 Bytes
02e90e4 01e655b d2b7e94 01e655b 1df74c6 d2b7e94 d5b3cd8 01e655b 02e90e4 01e655b 1df74c6 01e655b 1df74c6 01e655b 1df74c6 01e655b 1df74c6 d5b3cd8 1df74c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from typing import Union
from modules.SentenceSplitter import SentenceSplitter
from modules.speaker import Speaker
from modules.ssml_parser.SSMLParser import SSMLSegment
from modules.SynthesizeSegments import SynthesizeSegments, combine_audio_segments
from modules.utils import audio
def synthesize_audio(
text: str,
temperature: float = 0.3,
top_P: float = 0.7,
top_K: float = 20,
spk: Union[int, Speaker] = -1,
infer_seed: int = -1,
use_decoder: bool = True,
prompt1: str = "",
prompt2: str = "",
prefix: str = "",
batch_size: int = 1,
spliter_threshold: int = 100,
end_of_sentence="",
):
spliter = SentenceSplitter(spliter_threshold)
sentences = spliter.parse(text)
text_segments = [
SSMLSegment(
text=s,
params={
"temperature": temperature,
"top_P": top_P,
"top_K": top_K,
"spk": spk,
"infer_seed": infer_seed,
"use_decoder": use_decoder,
"prompt1": prompt1,
"prompt2": prompt2,
"prefix": prefix,
},
)
for s in sentences
]
synthesizer = SynthesizeSegments(
batch_size=batch_size, eos=end_of_sentence, spliter_thr=spliter_threshold
)
audio_segments = synthesizer.synthesize_segments(text_segments)
combined_audio = combine_audio_segments(audio_segments)
return audio.pydub_to_np(combined_audio)
|