Spaces:

ccolas
/

TastyPiano

Runtime error

TastyPiano / src /music /pipeline /url2audio.py

Cédric Colas

initial commit

e775f6d over 2 years ago

5.24 kB

	import os
	from pytube import YouTube
	from src.music.utils import RATE_AUDIO_SAVE, slugify
	from src.music.config import MAX_LEN

	# define filtering keyworfds
	start_keywords = [' ', '(', ',', ':']
	end_keywords = [')', ' ', '.', ',', '!', ':']
	def get_all_keywords(k):
	all_keywords = []
	for s in start_keywords:
	for e in end_keywords:
	all_keywords.append(s + k + e)
	return all_keywords
	filtered_keywords = ['duet', 'duo', 'quartet', 'orchestre', 'orchestra',
	'quintet', 'sixtet', 'septet', 'octet', 'backing track', 'accompaniment', 'string',
	'contrebrasse', 'drums', 'guitar'] + get_all_keywords('live') + get_all_keywords('trio')

	# list of playlist for which no filtering should occur on keywords (they were prefiltered already, it's supposed to be only piano)
	playlist_and_channel_not_to_filter = ["https://www.youtube.com/c/MySheetMusicTranscriptions",
	"https://www.youtube.com/c/PianoNotion",
	"https://www.youtube.com/c/PianoNotion",
	"https://www.youtube.com/watch?v=3F5glYefwio&list=PLFv3ZQw-ZPxi2DH3Bau7lBC5K6zfPJZxc",
	"https://www.youtube.com/user/Mercuziopianist",
	"https://www.youtube.com/channel/UCy6NPK6-xeX7MZLaMARa5qg",
	"https://www.youtube.com/channel/UCKMRNFV2dWTWIJnymtA9_Iw",
	"https://www.youtube.com/c/pianomaedaful",
	"https://www.youtube.com/c/FrancescoParrinoMusic",
	"https://www.youtube.com/c/itsremco"]
	playlist_ok = "https://www.youtube.com/watch?v=sYv_vk6bJtk&list=PLO9E3V4rGLD9-0BEd3t-AvvMcVF1zOJPj"


	def should_be_filtered(title, length, url, playlist_url, max_length):
	to_filter = False
	reason = ''
	lower_title = title.lower()
	if length > max_length:
	reason += f'it is too long (>{max_length/60:.1f} min), '
	to_filter = True
	if any([f in lower_title for f in filtered_keywords]) \
	and playlist_url not in playlist_and_channel_not_to_filter \
	and 'to live' not in lower_title and 'alive' not in lower_title \
	and url not in playlist_ok:
	reason += 'it contains a filtered keyword, '
	to_filter = True
	return to_filter, reason

	def convert_mp4_to_mp3(path, verbose=True):
	if verbose: print(f"Converting mp4 to mp3, in {path}\n")
	assert '.mp4' == path[-4:]
	os.system(f'ffmpeg -i "{path}" -loglevel panic -y -ac 1 -ar {int(RATE_AUDIO_SAVE)} "{path[:-4] + ".mp3"}" ')
	os.remove(path)
	if verbose: print('\tDone.')

	def pipeline_video(video, playlist_path, filename):
	# extract best stream for this video
	stream, kbps = extract_best_stream(video.streams)
	stream.download(output_path=playlist_path, filename=filename + '.mp4')
	# convert to mp3
	convert_mp4_to_mp3(playlist_path + filename + '.mp4', verbose=False)
	return kbps

	def extract_best_stream(streams):
	# extract best audio stream
	stream_out = streams.get_audio_only()
	kbps = int(stream_out.abr[:-4])
	return stream_out, kbps

	def get_title_and_length(video):
	title = video.title
	filename = slugify(title)
	length = video.length
	return title, filename, length, video.metadata


	def url2audio(playlist_path, video_url=None, video=None, playlist_url='', apply_filters=False, verbose=False, level=0):
	assert video_url is not None or video is not None, 'needs either video or url'
	error_msg = 'Error in loading video?'
	try:
	if not video:
	video = YouTube(video_url)
	error_msg += ' Nope. In extracting title and length?'
	title, filename, length, video_meta_data = get_title_and_length(video)
	if apply_filters:
	to_filter, reason = should_be_filtered(title, length, video_url, playlist_url, MAX_LEN)
	else:
	to_filter = False
	if not to_filter:
	audio_path = playlist_path + filename + ".mp3"
	if verbose: print(' ' * level + f'Downloading {title}, Url: {video_url}')
	if not os.path.exists(audio_path):
	if length > MAX_LEN and verbose: print(' ' * (level + 2) + f'Long video ({int(length/60)} min), will be cut after {int(MAX_LEN/60)} min.')
	error_msg += ' Nope. In pipeline video?'
	kbps = pipeline_video(video, playlist_path, filename)
	error_msg += ' Nope. In dict filling?'
	data = dict(title=title, filename=filename, length=length, kbps=kbps, url=video_url, meta=video_meta_data)
	error_msg += ' Nope. '
	else:
	if verbose: print(' ' * (level + 2) + 'Song already downloaded')
	data = None
	return audio_path, data, ''
	else:
	return None, None, f'Filtered because {reason}'
	except:
	if verbose: print(' ' * (level + 2) + f'Download failed with error {error_msg}')
	if os.path.exists(audio_path):
	os.remove(audio_path)
	return None, None, error_msg + ' Yes.'