File size: 2,977 Bytes
d1b91e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import subprocess
import librosa
import numpy as np
from data_gen.tts.wav_processors.base_processor import BaseWavProcessor, register_wav_processors
from utils.audio import trim_long_silences
from utils.audio.io import save_wav
from utils.audio.rnnoise import rnnoise
from utils.commons.hparams import hparams


@register_wav_processors(name='sox_to_wav')
class ConvertToWavProcessor(BaseWavProcessor):
    @property
    def name(self):
        return 'ToWav'

    def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
        if input_fn[-4:] == '.wav':
            return input_fn, sr
        else:
            output_fn = self.output_fn(input_fn)
            subprocess.check_call(f'sox -v 0.95 "{input_fn}" -t wav "{output_fn}"', shell=True)
            return output_fn, sr


@register_wav_processors(name='sox_resample')
class ResampleProcessor(BaseWavProcessor):
    @property
    def name(self):
        return 'Resample'

    def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
        output_fn = self.output_fn(input_fn)
        sr_file = librosa.core.get_samplerate(input_fn)
        if sr != sr_file:
            subprocess.check_call(f'sox -v 0.95 "{input_fn}" -r{sr} "{output_fn}"', shell=True)
            y, _ = librosa.core.load(input_fn, sr=sr)
            y, _ = librosa.effects.trim(y)
            save_wav(y, output_fn, sr)
            return output_fn, sr
        else:
            return input_fn, sr


@register_wav_processors(name='trim_sil')
class TrimSILProcessor(BaseWavProcessor):
    @property
    def name(self):
        return 'TrimSIL'

    def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
        output_fn = self.output_fn(input_fn)
        y, _ = librosa.core.load(input_fn, sr=sr)
        y, _ = librosa.effects.trim(y)
        save_wav(y, output_fn, sr)
        return output_fn


@register_wav_processors(name='trim_all_sil')
class TrimAllSILProcessor(BaseWavProcessor):
    @property
    def name(self):
        return 'TrimSIL'

    def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
        output_fn = self.output_fn(input_fn)
        y, audio_mask, _ = trim_long_silences(
            input_fn, vad_max_silence_length=preprocess_args.get('vad_max_silence_length', 12))
        save_wav(y, output_fn, sr)
        if preprocess_args['save_sil_mask']:
            os.makedirs(f'{processed_dir}/sil_mask', exist_ok=True)
            np.save(f'{processed_dir}/sil_mask/{item_name}.npy', audio_mask)
        return output_fn, sr


@register_wav_processors(name='denoise')
class DenoiseProcessor(BaseWavProcessor):
    @property
    def name(self):
        return 'Denoise'

    def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
        output_fn = self.output_fn(input_fn)
        rnnoise(input_fn, output_fn, out_sample_rate=sr)
        return output_fn, sr