Cédric Colas
initial commit
e775f6d
raw
history blame
5.24 kB
import os
from pytube import YouTube
from src.music.utils import RATE_AUDIO_SAVE, slugify
from src.music.config import MAX_LEN
# define filtering keyworfds
start_keywords = [' ', '(', ',', ':']
end_keywords = [')', ' ', '.', ',', '!', ':']
def get_all_keywords(k):
all_keywords = []
for s in start_keywords:
for e in end_keywords:
all_keywords.append(s + k + e)
return all_keywords
filtered_keywords = ['duet', 'duo', 'quartet', 'orchestre', 'orchestra',
'quintet', 'sixtet', 'septet', 'octet', 'backing track', 'accompaniment', 'string',
'contrebrasse', 'drums', 'guitar'] + get_all_keywords('live') + get_all_keywords('trio')
# list of playlist for which no filtering should occur on keywords (they were prefiltered already, it's supposed to be only piano)
playlist_and_channel_not_to_filter = ["https://www.youtube.com/c/MySheetMusicTranscriptions",
"https://www.youtube.com/c/PianoNotion",
"https://www.youtube.com/c/PianoNotion",
"https://www.youtube.com/watch?v=3F5glYefwio&list=PLFv3ZQw-ZPxi2DH3Bau7lBC5K6zfPJZxc",
"https://www.youtube.com/user/Mercuziopianist",
"https://www.youtube.com/channel/UCy6NPK6-xeX7MZLaMARa5qg",
"https://www.youtube.com/channel/UCKMRNFV2dWTWIJnymtA9_Iw",
"https://www.youtube.com/c/pianomaedaful",
"https://www.youtube.com/c/FrancescoParrinoMusic",
"https://www.youtube.com/c/itsremco"]
playlist_ok = "https://www.youtube.com/watch?v=sYv_vk6bJtk&list=PLO9E3V4rGLD9-0BEd3t-AvvMcVF1zOJPj"
def should_be_filtered(title, length, url, playlist_url, max_length):
to_filter = False
reason = ''
lower_title = title.lower()
if length > max_length:
reason += f'it is too long (>{max_length/60:.1f} min), '
to_filter = True
if any([f in lower_title for f in filtered_keywords]) \
and playlist_url not in playlist_and_channel_not_to_filter \
and 'to live' not in lower_title and 'alive' not in lower_title \
and url not in playlist_ok:
reason += 'it contains a filtered keyword, '
to_filter = True
return to_filter, reason
def convert_mp4_to_mp3(path, verbose=True):
if verbose: print(f"Converting mp4 to mp3, in {path}\n")
assert '.mp4' == path[-4:]
os.system(f'ffmpeg -i "{path}" -loglevel panic -y -ac 1 -ar {int(RATE_AUDIO_SAVE)} "{path[:-4] + ".mp3"}" ')
os.remove(path)
if verbose: print('\tDone.')
def pipeline_video(video, playlist_path, filename):
# extract best stream for this video
stream, kbps = extract_best_stream(video.streams)
stream.download(output_path=playlist_path, filename=filename + '.mp4')
# convert to mp3
convert_mp4_to_mp3(playlist_path + filename + '.mp4', verbose=False)
return kbps
def extract_best_stream(streams):
# extract best audio stream
stream_out = streams.get_audio_only()
kbps = int(stream_out.abr[:-4])
return stream_out, kbps
def get_title_and_length(video):
title = video.title
filename = slugify(title)
length = video.length
return title, filename, length, video.metadata
def url2audio(playlist_path, video_url=None, video=None, playlist_url='', apply_filters=False, verbose=False, level=0):
assert video_url is not None or video is not None, 'needs either video or url'
error_msg = 'Error in loading video?'
try:
if not video:
video = YouTube(video_url)
error_msg += ' Nope. In extracting title and length?'
title, filename, length, video_meta_data = get_title_and_length(video)
if apply_filters:
to_filter, reason = should_be_filtered(title, length, video_url, playlist_url, MAX_LEN)
else:
to_filter = False
if not to_filter:
audio_path = playlist_path + filename + ".mp3"
if verbose: print(' ' * level + f'Downloading {title}, Url: {video_url}')
if not os.path.exists(audio_path):
if length > MAX_LEN and verbose: print(' ' * (level + 2) + f'Long video ({int(length/60)} min), will be cut after {int(MAX_LEN/60)} min.')
error_msg += ' Nope. In pipeline video?'
kbps = pipeline_video(video, playlist_path, filename)
error_msg += ' Nope. In dict filling?'
data = dict(title=title, filename=filename, length=length, kbps=kbps, url=video_url, meta=video_meta_data)
error_msg += ' Nope. '
else:
if verbose: print(' ' * (level + 2) + 'Song already downloaded')
data = None
return audio_path, data, ''
else:
return None, None, f'Filtered because {reason}'
except:
if verbose: print(' ' * (level + 2) + f'Download failed with error {error_msg}')
if os.path.exists(audio_path):
os.remove(audio_path)
return None, None, error_msg + ' Yes.'