zhzluke96
update
01e655b
raw
history blame
2.86 kB
from pydantic import BaseModel
from typing import Any
import torch
from modules.speaker import Speaker, speaker_mgr
from modules.data import styles_mgr
from pydub import AudioSegment
from modules.ssml import merge_prompt
from enum import Enum
class ParamsTypeError(Exception):
pass
class AudioFormat(str, Enum):
mp3 = "mp3"
wav = "wav"
class BaseResponse(BaseModel):
message: str
data: Any
class Config:
json_encoders = {
torch.Tensor: lambda v: v.tolist(),
Speaker: lambda v: v.to_json(),
}
def wav_to_mp3(wav_data, bitrate="48k"):
audio = AudioSegment.from_wav(
wav_data,
)
return audio.export(format="mp3", bitrate=bitrate)
def to_number(value, t, default=0):
try:
number = t(value)
return number
except (ValueError, TypeError) as e:
return default
def calc_spk_style(spk: str | int, style: str | int):
voice_attrs = {
"spk": None,
"seed": None,
"prompt1": None,
"prompt2": None,
"prefix": None,
"temperature": None,
}
params = {}
if type(spk) == int:
voice_attrs["spk"] = spk
elif type(spk) == str:
if spk.isdigit():
voice_attrs["spk"] = int(spk)
else:
spker = speaker_mgr.get_speaker(spk)
if spker:
voice_attrs["spk"] = spker
if type(style) == int or type(style) == float:
raise ParamsTypeError("The style parameter cannot be a number.")
elif type(style) == str and style != "":
if style.isdigit():
raise ParamsTypeError("The style parameter cannot be a number.")
else:
style_params = styles_mgr.find_params_by_name(style)
for k, v in style_params.items():
params[k] = v
voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}
merge_prompt(voice_attrs, params)
voice_attrs["spk"] = params.get("spk", voice_attrs.get("spk", None))
voice_attrs["seed"] = params.get("seed", voice_attrs.get("seed", None))
voice_attrs["temperature"] = params.get(
"temp", voice_attrs.get("temperature", None)
)
voice_attrs["prefix"] = params.get("prefix", voice_attrs.get("prefix", None))
voice_attrs["prompt1"] = params.get("prompt1", voice_attrs.get("prompt1", None))
voice_attrs["prompt2"] = params.get("prompt2", voice_attrs.get("prompt2", None))
if voice_attrs.get("temperature", "") == "min":
# ref: https://github.com/2noise/ChatTTS/issues/123#issue-2326908144
voice_attrs["temperature"] = 0.000000000001
if voice_attrs.get("temperature", "") == "max":
voice_attrs["temperature"] = 1
voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}
# print(voice_attrs)
return voice_attrs