Spaces:

YoMioAI
/

GPT-SoVITS-3s-cloning-free-TTS

Running

File size: 1,932 Bytes

import io
import os
import pickle
import re

import soundfile as sf
import numpy as np
from pydub import AudioSegment
from pyloudnorm import Meter

os.chdir(os.path.dirname(os.path.abspath(__file__)))

def normalize_audio_loudness(data: bytes, target_loudness: float = -23.0) -> bytes:
    audio = AudioSegment.from_file(io.BytesIO(data), format='mp3')    
    meter = Meter(audio.frame_rate)  # 创建一个响度计量器
    sr = audio.frame_rate
    samples = audio.get_array_of_samples()
    audio = np.array(samples, dtype=np.float64)

    # 测量积分响度
    loudness = meter.integrated_loudness(audio)

    # 计算增益
    gain_db = target_loudness - loudness
    gain_linear = 10 ** (gain_db / 20.0)

    # 应用增益
    balanced_audio = audio * gain_linear

    # 应用软限幅以防止削波
    balanced_audio = np.tanh(balanced_audio)

    # 将numpy数组转换回bytes
    balanced_audio = (balanced_audio * 32767).astype(np.int16)
    byte_io = io.BytesIO()
    sf.write(byte_io, balanced_audio, sr, format='mp3')
    normalized_audio_bytes = byte_io.getvalue()

    return normalized_audio_bytes

def get_length(text: str) -> float:
    def calculate_string_length(text: str) -> float:
        def split_into_words(s: str) -> list[str]:
            return re.findall(r"\b\w+\b|[^\w\s]|\s+", s)

        def calculate_effective_length(words: list[str]) -> float:
            length = 0
            for word in words:
                if re.match(r"^[\u4e00-\u9fff\u3040-\u30ff\u3400-\u4dbf]+$", word):
                    length += len(word)
                elif re.match(r"^\w+$", word):
                    length += 1
                else:
                    length += len(word) * 0.5
            return length

        words = split_into_words(text)
        return calculate_effective_length(words)

    return calculate_string_length(text)

if __name__ == "__main__":
    normalize_audio_loudness()