Spaces:

Pendrokar
/

xVASynth-TTS

Running on CPU Upgrade

App Files Files Community

xVASynth-TTS / lib /ffmpeg_normalize /_media_file.py

Pendrokar

xVASynth v3 code for English

19c8b95 9 months ago

raw

history blame

13.5 kB

	import os
	import re
	import tempfile
	import shutil
	from tqdm import tqdm
	import shlex

	from ._streams import AudioStream, VideoStream, SubtitleStream
	from ._errors import FFmpegNormalizeError
	from ._cmd_utils import NUL, CommandRunner, DUR_REGEX, to_ms
	from ._logger import setup_custom_logger

	logger = setup_custom_logger("ffmpeg_normalize")


	class MediaFile:
	"""
	Class that holds a file, its streams and adjustments
	"""

	def __init__(self, ffmpeg_normalize, input_file, output_file=None):
	"""
	Initialize a media file for later normalization.

	Arguments:
	ffmpeg_normalize {FFmpegNormalize} -- reference to overall settings
	input_file {str} -- Path to input file

	Keyword Arguments:
	output_file {str} -- Path to output file (default: {None})
	"""
	self.ffmpeg_normalize = ffmpeg_normalize
	self.skip = False
	self.input_file = input_file
	self.output_file = output_file
	self.streams = {"audio": {}, "video": {}, "subtitle": {}}

	self.parse_streams()

	def _stream_ids(self):
	return (
	list(self.streams["audio"].keys())
	+ list(self.streams["video"].keys())
	+ list(self.streams["subtitle"].keys())
	)

	def __repr__(self):
	return os.path.basename(self.input_file)

	def parse_streams(self):
	"""
	Try to parse all input streams from file
	"""
	logger.debug(f"Parsing streams of {self.input_file}")

	cmd = [
	self.ffmpeg_normalize.ffmpeg_exe,
	"-i",
	self.input_file,
	"-c",
	"copy",
	"-t",
	"0",
	"-map",
	"0",
	"-f",
	"null",
	NUL,
	]

	cmd_runner = CommandRunner(cmd)
	cmd_runner.run_command()
	output = cmd_runner.get_output()

	logger.debug("Stream parsing command output:")
	logger.debug(output)

	output_lines = [line.strip() for line in output.split("\n")]

	duration = None
	for line in output_lines:

	if "Duration" in line:
	duration_search = DUR_REGEX.search(line)
	if not duration_search:
	logger.warning("Could not extract duration from input file!")
	else:
	duration = duration_search.groupdict()
	duration = to_ms(**duration) / 1000
	logger.debug("Found duration: " + str(duration) + " s")

	if not line.startswith("Stream"):
	continue

	stream_id_match = re.search(r"#0:([\d]+)", line)
	if stream_id_match:
	stream_id = int(stream_id_match.group(1))
	if stream_id in self._stream_ids():
	continue
	else:
	continue

	if "Audio" in line:
	logger.debug(f"Found audio stream at index {stream_id}")
	sample_rate_match = re.search(r"(\d+) Hz", line)
	sample_rate = (
	int(sample_rate_match.group(1)) if sample_rate_match else None
	)
	bit_depth_match = re.search(r"s(\d+)p?,", line)
	bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None
	self.streams["audio"][stream_id] = AudioStream(
	self,
	self.ffmpeg_normalize,
	stream_id,
	sample_rate,
	bit_depth,
	duration,
	)

	elif "Video" in line:
	logger.debug(f"Found video stream at index {stream_id}")
	self.streams["video"][stream_id] = VideoStream(
	self, self.ffmpeg_normalize, stream_id
	)

	elif "Subtitle" in line:
	logger.debug(f"Found subtitle stream at index {stream_id}")
	self.streams["subtitle"][stream_id] = SubtitleStream(
	self, self.ffmpeg_normalize, stream_id
	)

	if not self.streams["audio"]:
	raise FFmpegNormalizeError(
	f"Input file {self.input_file} does not contain any audio streams"
	)

	if (
	os.path.splitext(self.output_file)[1].lower() in [".wav", ".mp3", ".aac"]
	and len(self.streams["audio"].values()) > 1
	):
	logger.warning(
	"Output file only supports one stream. "
	"Keeping only first audio stream."
	)
	first_stream = list(self.streams["audio"].values())[0]
	self.streams["audio"] = {first_stream.stream_id: first_stream}
	self.streams["video"] = {}
	self.streams["subtitle"] = {}

	def run_normalization(self):
	logger.debug(f"Running normalization for {self.input_file}")

	# run the first pass to get loudness stats
	self._first_pass()

	# run the second pass as a whole
	if self.ffmpeg_normalize.progress:
	with tqdm(total=100, position=1, desc="Second Pass") as pbar:
	for progress in self._second_pass():
	pbar.update(progress - pbar.n)
	else:
	for _ in self._second_pass():
	pass

	def _first_pass(self):
	logger.debug(f"Parsing normalization info for {self.input_file}")

	for index, audio_stream in enumerate(self.streams["audio"].values()):
	if self.ffmpeg_normalize.normalization_type == "ebu":
	fun = getattr(audio_stream, "parse_loudnorm_stats")
	else:
	fun = getattr(audio_stream, "parse_volumedetect_stats")

	if self.ffmpeg_normalize.progress:
	with tqdm(
	total=100,
	position=1,
	desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}",
	) as pbar:
	for progress in fun():
	pbar.update(progress - pbar.n)
	else:
	for _ in fun():
	pass

	if self.ffmpeg_normalize.print_stats:
	stats = [
	audio_stream.get_stats()
	for audio_stream in self.streams["audio"].values()
	]
	self.ffmpeg_normalize.stats.extend(stats)

	def _get_audio_filter_cmd(self):
	"""
	Return filter_complex command and output labels needed
	"""
	filter_chains = []
	output_labels = []

	for audio_stream in self.streams["audio"].values():
	if self.ffmpeg_normalize.normalization_type == "ebu":
	normalization_filter = audio_stream.get_second_pass_opts_ebu()
	else:
	normalization_filter = audio_stream.get_second_pass_opts_peakrms()

	input_label = f"[0:{audio_stream.stream_id}]"
	output_label = f"[norm{audio_stream.stream_id}]"
	output_labels.append(output_label)

	filter_chain = []

	if self.ffmpeg_normalize.pre_filter:
	filter_chain.append(self.ffmpeg_normalize.pre_filter)

	filter_chain.append(normalization_filter)

	if self.ffmpeg_normalize.post_filter:
	filter_chain.append(self.ffmpeg_normalize.post_filter)

	filter_chains.append(input_label + ",".join(filter_chain) + output_label)

	filter_complex_cmd = ";".join(filter_chains)

	return filter_complex_cmd, output_labels

	def _second_pass(self):
	"""
	Construct the second pass command and run it

	FIXME: make this method simpler
	"""
	logger.info(f"Running second pass for {self.input_file}")

	# get the target output stream types depending on the options
	output_stream_types = ["audio"]
	if not self.ffmpeg_normalize.video_disable:
	output_stream_types.append("video")
	if not self.ffmpeg_normalize.subtitle_disable:
	output_stream_types.append("subtitle")

	# base command, here we will add all other options
	cmd = [self.ffmpeg_normalize.ffmpeg_exe, "-y", "-nostdin"]

	# extra options (if any)
	if self.ffmpeg_normalize.extra_input_options:
	cmd.extend(self.ffmpeg_normalize.extra_input_options)

	# get complex filter command
	audio_filter_cmd, output_labels = self._get_audio_filter_cmd()

	# add input file and basic filter
	cmd.extend(["-i", self.input_file, "-filter_complex", audio_filter_cmd])

	# map metadata, only if needed
	if self.ffmpeg_normalize.metadata_disable:
	cmd.extend(["-map_metadata", "-1"])
	else:
	# map global metadata
	cmd.extend(["-map_metadata", "0"])
	# map per-stream metadata (e.g. language tags)
	for stream_type in output_stream_types:
	stream_key = stream_type[0]
	if stream_type not in self.streams:
	continue
	for idx, _ in enumerate(self.streams[stream_type].items()):
	cmd.extend(
	[
	f"-map_metadata:s:{stream_key}:{idx}",
	f"0:s:{stream_key}:{idx}",
	]
	)

	# map chapters if needed
	if self.ffmpeg_normalize.chapters_disable:
	cmd.extend(["-map_chapters", "-1"])
	else:
	cmd.extend(["-map_chapters", "0"])

	# collect all '-map' and codecs needed for output video based on input video
	if not self.ffmpeg_normalize.video_disable:
	for s in self.streams["video"].keys():
	cmd.extend(["-map", f"0:{s}"])
	# set codec (copy by default)
	cmd.extend(["-c:v", self.ffmpeg_normalize.video_codec])

	# ... and map the output of the normalization filters
	for ol in output_labels:
	cmd.extend(["-map", ol])

	# set audio codec (never copy)
	if self.ffmpeg_normalize.audio_codec:
	cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec])
	else:
	for index, (_, audio_stream) in enumerate(self.streams["audio"].items()):
	cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()])

	# other audio options (if any)
	if self.ffmpeg_normalize.audio_bitrate:
	cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)])
	if self.ffmpeg_normalize.sample_rate:
	cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)])
	else:
	if self.ffmpeg_normalize.normalization_type == "ebu":
	logger.warn(
	"The sample rate will automatically be set to 192 kHz by the loudnorm filter. "
	"Specify -ar/--sample-rate to override it."
	)

	# ... and subtitles
	if not self.ffmpeg_normalize.subtitle_disable:
	for s in self.streams["subtitle"].keys():
	cmd.extend(["-map", f"0:{s}"])
	# copy subtitles
	cmd.extend(["-c:s", "copy"])

	if self.ffmpeg_normalize.keep_original_audio:
	highest_index = len(self.streams["audio"])
	for index, (_, s) in enumerate(self.streams["audio"].items()):
	cmd.extend(["-map", f"0:a:{index}"])
	cmd.extend([f"-c:a:{highest_index + index}", "copy"])

	# extra options (if any)
	if self.ffmpeg_normalize.extra_output_options:
	cmd.extend(self.ffmpeg_normalize.extra_output_options)

	# output format (if any)
	if self.ffmpeg_normalize.output_format:
	cmd.extend(["-f", self.ffmpeg_normalize.output_format])

	# if dry run, only show sample command
	if self.ffmpeg_normalize.dry_run:
	cmd.append(self.output_file)
	cmd_runner = CommandRunner(cmd, dry=True)
	cmd_runner.run_command()
	yield 100
	return

	# create a temporary output file name
	temp_dir = tempfile.gettempdir()
	output_file_suffix = os.path.splitext(self.output_file)[1]
	temp_file_name = os.path.join(
	temp_dir, next(tempfile._get_candidate_names()) + output_file_suffix
	)
	cmd.append(temp_file_name)

	# run the actual command
	try:
	cmd_runner = CommandRunner(cmd)
	try:
	for progress in cmd_runner.run_ffmpeg_command():
	yield progress
	except Exception as e:
	logger.error(
	"Error while running command {}! Error: {}".format(
	" ".join([shlex.quote(c) for c in cmd]), e
	)
	)
	raise e
	else:
	# move file from TMP to output file
	logger.debug(
	f"Moving temporary file from {temp_file_name} to {self.output_file}"
	)
	shutil.move(temp_file_name, self.output_file)
	except Exception as e:
	# remove dangling temporary file
	if os.path.isfile(temp_file_name):
	os.remove(temp_file_name)
	raise e

	logger.debug("Normalization finished")