Spaces:

pivich
/

sovits-new

Sleeping

sovits-new / so_vits_svc_fork /preprocessing /preprocess_resample.py

Vladimir Alabov

Refactor #3

46b0a70 over 1 year ago

4.45 kB

	from __future__ import annotations

	import warnings
	from logging import getLogger
	from pathlib import Path
	from typing import Iterable

	import librosa
	import soundfile
	from joblib import Parallel, delayed
	from tqdm_joblib import tqdm_joblib

	from .preprocess_utils import check_hubert_min_duration

	LOG = getLogger(__name__)

	# input_dir and output_dir exists.
	# write code to convert input dir audio files to output dir audio files,
	# without changing folder structure. Use joblib to parallelize.
	# Converting audio files includes:
	# - resampling to specified sampling rate
	# - trim silence
	# - adjust volume in a smart way
	# - save as 16-bit wav file


	def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path:
	"""Return a unique path by appending a number to the original path."""
	if path not in existing_paths:
	return path
	i = 1
	while True:
	new_path = path.parent / f"{path.stem}_{i}{path.suffix}"
	if new_path not in existing_paths:
	return new_path
	i += 1


	def is_relative_to(path: Path, *other):
	"""Return True if the path is relative to another path or False.
	Python 3.9+ has Path.is_relative_to() method, but we need to support Python 3.8.
	"""
	try:
	path.relative_to(*other)
	return True
	except ValueError:
	return False


	def _preprocess_one(
	input_path: Path,
	output_path: Path,
	sr: int,
	*,
	top_db: int,
	frame_seconds: float,
	hop_seconds: float,
	) -> None:
	"""Preprocess one audio file."""

	try:
	audio, sr = librosa.load(input_path, sr=sr, mono=True)

	# Audioread is the last backend it will attempt, so this is the exception thrown on failure
	except Exception as e:
	# Failure due to attempting to load a file that is not audio, so return early
	LOG.warning(f"Failed to load {input_path} due to {e}")
	return

	if not check_hubert_min_duration(audio, sr):
	LOG.info(f"Skip {input_path} because it is too short.")
	return

	# Adjust volume
	audio /= max(audio.max(), -audio.min())

	# Trim silence
	audio, _ = librosa.effects.trim(
	audio,
	top_db=top_db,
	frame_length=int(frame_seconds * sr),
	hop_length=int(hop_seconds * sr),
	)

	if not check_hubert_min_duration(audio, sr):
	LOG.info(f"Skip {input_path} because it is too short.")
	return

	soundfile.write(output_path, audio, samplerate=sr, subtype="PCM_16")


	def preprocess_resample(
	input_dir: Path \| str,
	output_dir: Path \| str,
	sampling_rate: int,
	n_jobs: int = -1,
	*,
	top_db: int = 30,
	frame_seconds: float = 0.1,
	hop_seconds: float = 0.05,
	) -> None:
	input_dir = Path(input_dir)
	output_dir = Path(output_dir)
	"""Preprocess audio files in input_dir and save them to output_dir."""

	out_paths = []
	in_paths = list(input_dir.rglob("."))
	if not in_paths:
	raise ValueError(f"No audio files found in {input_dir}")
	for in_path in in_paths:
	in_path_relative = in_path.relative_to(input_dir)
	if not in_path.is_absolute() and is_relative_to(
	in_path, Path("dataset_raw") / "44k"
	):
	new_in_path_relative = in_path_relative.relative_to("44k")
	warnings.warn(
	f"Recommended folder structure has changed since v1.0.0. "
	"Please move your dataset directly under dataset_raw folder. "
	f"Recoginzed {in_path_relative} as {new_in_path_relative}"
	)
	in_path_relative = new_in_path_relative

	if len(in_path_relative.parts) < 2:
	continue
	speaker_name = in_path_relative.parts[0]
	file_name = in_path_relative.with_suffix(".wav").name
	out_path = output_dir / speaker_name / file_name
	out_path = _get_unique_filename(out_path, out_paths)
	out_path.parent.mkdir(parents=True, exist_ok=True)
	out_paths.append(out_path)

	in_and_out_paths = list(zip(in_paths, out_paths))

	with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
	Parallel(n_jobs=n_jobs)(
	delayed(_preprocess_one)(
	*args,
	sr=sampling_rate,
	top_db=top_db,
	frame_seconds=frame_seconds,
	hop_seconds=hop_seconds,
	)
	for args in in_and_out_paths
	)