EzAudio-ControlNet

Paused

App Files Files Community

EzAudio-ControlNet / audiotools /data /preprocess.py

OpenSound

Upload 211 files

9d3cb0a verified 2 months ago

raw

history blame contribute delete

2.74 kB

	import csv
	import os
	from pathlib import Path

	from tqdm import tqdm

	from ..core import AudioSignal


	def create_csv(
	audio_files: list, output_csv: Path, loudness: bool = False, data_path: str = None
	):
	"""Converts a folder of audio files to a CSV file. If ``loudness = True``,
	the output of this function will create a CSV file that looks something
	like:

	.. csv-table::
	:header: path,loudness

	daps/produced/f1_script1_produced.wav,-16.299999237060547
	daps/produced/f1_script2_produced.wav,-16.600000381469727
	daps/produced/f1_script3_produced.wav,-17.299999237060547
	daps/produced/f1_script4_produced.wav,-16.100000381469727
	daps/produced/f1_script5_produced.wav,-16.700000762939453
	daps/produced/f3_script1_produced.wav,-16.5

	.. note::
	The paths above are written relative to the ``data_path`` argument
	which defaults to the environment variable ``PATH_TO_DATA`` if
	it isn't passed to this function, and defaults to the empty string
	if that environment variable is not set.

	You can produce a CSV file from a directory of audio files via:

	>>> import audiotools
	>>> directory = ...
	>>> audio_files = audiotools.util.find_audio(directory)
	>>> output_path = "train.csv"
	>>> audiotools.data.preprocess.create_csv(
	>>> audio_files, output_csv, loudness=True
	>>> )

	Note that you can create empty rows in the CSV file by passing an empty
	string or None in the ``audio_files`` list. This is useful if you want to
	sync multiple CSV files in a multitrack setting. The loudness of these
	empty rows will be set to -inf.

	Parameters
	----------
	audio_files : list
	List of audio files.
	output_csv : Path
	Output CSV, with each row containing the relative path of every file
	to ``data_path``, if specified (defaults to None).
	loudness : bool
	Compute loudness of entire file and store alongside path.
	"""

	info = []
	pbar = tqdm(audio_files)
	for af in pbar:
	af = Path(af)
	pbar.set_description(f"Processing {af.name}")
	_info = {}
	if af.name == "":
	_info["path"] = ""
	if loudness:
	_info["loudness"] = -float("inf")
	else:
	_info["path"] = af.relative_to(data_path) if data_path is not None else af
	if loudness:
	_info["loudness"] = AudioSignal(af).ffmpeg_loudness().item()

	info.append(_info)

	with open(output_csv, "w") as f:
	writer = csv.DictWriter(f, fieldnames=list(info[0].keys()))
	writer.writeheader()

	for item in info:
	writer.writerow(item)