Spaces:
Runtime error
Runtime error
File size: 2,977 Bytes
d1b91e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import subprocess
import librosa
import numpy as np
from data_gen.tts.wav_processors.base_processor import BaseWavProcessor, register_wav_processors
from utils.audio import trim_long_silences
from utils.audio.io import save_wav
from utils.audio.rnnoise import rnnoise
from utils.commons.hparams import hparams
@register_wav_processors(name='sox_to_wav')
class ConvertToWavProcessor(BaseWavProcessor):
@property
def name(self):
return 'ToWav'
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
if input_fn[-4:] == '.wav':
return input_fn, sr
else:
output_fn = self.output_fn(input_fn)
subprocess.check_call(f'sox -v 0.95 "{input_fn}" -t wav "{output_fn}"', shell=True)
return output_fn, sr
@register_wav_processors(name='sox_resample')
class ResampleProcessor(BaseWavProcessor):
@property
def name(self):
return 'Resample'
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
output_fn = self.output_fn(input_fn)
sr_file = librosa.core.get_samplerate(input_fn)
if sr != sr_file:
subprocess.check_call(f'sox -v 0.95 "{input_fn}" -r{sr} "{output_fn}"', shell=True)
y, _ = librosa.core.load(input_fn, sr=sr)
y, _ = librosa.effects.trim(y)
save_wav(y, output_fn, sr)
return output_fn, sr
else:
return input_fn, sr
@register_wav_processors(name='trim_sil')
class TrimSILProcessor(BaseWavProcessor):
@property
def name(self):
return 'TrimSIL'
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
output_fn = self.output_fn(input_fn)
y, _ = librosa.core.load(input_fn, sr=sr)
y, _ = librosa.effects.trim(y)
save_wav(y, output_fn, sr)
return output_fn
@register_wav_processors(name='trim_all_sil')
class TrimAllSILProcessor(BaseWavProcessor):
@property
def name(self):
return 'TrimSIL'
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
output_fn = self.output_fn(input_fn)
y, audio_mask, _ = trim_long_silences(
input_fn, vad_max_silence_length=preprocess_args.get('vad_max_silence_length', 12))
save_wav(y, output_fn, sr)
if preprocess_args['save_sil_mask']:
os.makedirs(f'{processed_dir}/sil_mask', exist_ok=True)
np.save(f'{processed_dir}/sil_mask/{item_name}.npy', audio_mask)
return output_fn, sr
@register_wav_processors(name='denoise')
class DenoiseProcessor(BaseWavProcessor):
@property
def name(self):
return 'Denoise'
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args):
output_fn = self.output_fn(input_fn)
rnnoise(input_fn, output_fn, out_sample_rate=sr)
return output_fn, sr
|