File size: 1,929 Bytes
f80c5ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import librosa
from scipy import signal
from .slicer2 import Slicer


class Preprocessor:
    def __init__(
        self, sr: int, max_slice_length: float = 3.0, min_slice_length: float = 0.5
    ):
        self.slicer = Slicer(
            sr=sr,
            threshold=-42,
            min_length=1500,
            min_interval=400,
            hop_size=15,
            max_sil_kept=500,
        )
        self.sr = sr
        self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr)
        self.max_slice_length = max_slice_length
        self.max_slice_length = min_slice_length
        self.overlap = 0.3
        self.tail = self.max_slice_length + self.overlap
        self.max = 0.9
        self.alpha = 0.75

    def norm(self, samples: np.ndarray) -> np.ndarray:
        sample_max = np.abs(samples).max()
        normalized = samples / sample_max * self.max
        normalized = (normalized * self.alpha) + (samples * (1 - self.alpha))
        return normalized

    def preprocess_audio(self, y: np.ndarray) -> list[np.ndarray]:
        y = signal.filtfilt(self.bh, self.ah, y)
        audios = []
        for audio in self.slicer.slice(y):
            i = 0
            while True:
                start = int(self.sr * (self.max_slice_length - self.overlap) * i)
                i += 1
                if len(audio[start:]) > self.tail * self.sr:
                    slice = audio[start : start + int(self.max_slice_length * self.sr)]
                    audios.append(self.norm(slice))
                else:
                    slice = audio[start:]
                    if len(slice) > self.min_slice_length * self.sr:
                        audios.append(self.norm(slice))
                    break
        return audios

    def preprocess_file(self, file_path: str) -> list[np.ndarray]:
        y, _ = librosa.load(file_path, sr=self.sr)
        return self.preprocess_audio(y)