Spaces:
Runtime error
Runtime error
import os | |
from pytube import YouTube | |
from src.music.utils import RATE_AUDIO_SAVE, slugify | |
from src.music.config import MAX_LEN | |
# define filtering keyworfds | |
start_keywords = [' ', '(', ',', ':'] | |
end_keywords = [')', ' ', '.', ',', '!', ':'] | |
def get_all_keywords(k): | |
all_keywords = [] | |
for s in start_keywords: | |
for e in end_keywords: | |
all_keywords.append(s + k + e) | |
return all_keywords | |
filtered_keywords = ['duet', 'duo', 'quartet', 'orchestre', 'orchestra', | |
'quintet', 'sixtet', 'septet', 'octet', 'backing track', 'accompaniment', 'string', | |
'contrebrasse', 'drums', 'guitar'] + get_all_keywords('live') + get_all_keywords('trio') | |
# list of playlist for which no filtering should occur on keywords (they were prefiltered already, it's supposed to be only piano) | |
playlist_and_channel_not_to_filter = ["https://www.youtube.com/c/MySheetMusicTranscriptions", | |
"https://www.youtube.com/c/PianoNotion", | |
"https://www.youtube.com/c/PianoNotion", | |
"https://www.youtube.com/watch?v=3F5glYefwio&list=PLFv3ZQw-ZPxi2DH3Bau7lBC5K6zfPJZxc", | |
"https://www.youtube.com/user/Mercuziopianist", | |
"https://www.youtube.com/channel/UCy6NPK6-xeX7MZLaMARa5qg", | |
"https://www.youtube.com/channel/UCKMRNFV2dWTWIJnymtA9_Iw", | |
"https://www.youtube.com/c/pianomaedaful", | |
"https://www.youtube.com/c/FrancescoParrinoMusic", | |
"https://www.youtube.com/c/itsremco"] | |
playlist_ok = "https://www.youtube.com/watch?v=sYv_vk6bJtk&list=PLO9E3V4rGLD9-0BEd3t-AvvMcVF1zOJPj" | |
def should_be_filtered(title, length, url, playlist_url, max_length): | |
to_filter = False | |
reason = '' | |
lower_title = title.lower() | |
if length > max_length: | |
reason += f'it is too long (>{max_length/60:.1f} min), ' | |
to_filter = True | |
if any([f in lower_title for f in filtered_keywords]) \ | |
and playlist_url not in playlist_and_channel_not_to_filter \ | |
and 'to live' not in lower_title and 'alive' not in lower_title \ | |
and url not in playlist_ok: | |
reason += 'it contains a filtered keyword, ' | |
to_filter = True | |
return to_filter, reason | |
def convert_mp4_to_mp3(path, verbose=True): | |
if verbose: print(f"Converting mp4 to mp3, in {path}\n") | |
assert '.mp4' == path[-4:] | |
os.system(f'ffmpeg -i "{path}" -loglevel panic -y -ac 1 -ar {int(RATE_AUDIO_SAVE)} "{path[:-4] + ".mp3"}" ') | |
os.remove(path) | |
if verbose: print('\tDone.') | |
def pipeline_video(video, playlist_path, filename): | |
# extract best stream for this video | |
stream, kbps = extract_best_stream(video.streams) | |
stream.download(output_path=playlist_path, filename=filename + '.mp4') | |
# convert to mp3 | |
convert_mp4_to_mp3(playlist_path + filename + '.mp4', verbose=False) | |
return kbps | |
def extract_best_stream(streams): | |
# extract best audio stream | |
stream_out = streams.get_audio_only() | |
kbps = int(stream_out.abr[:-4]) | |
return stream_out, kbps | |
def get_title_and_length(video): | |
title = video.title | |
filename = slugify(title) | |
length = video.length | |
return title, filename, length, video.metadata | |
def url2audio(playlist_path, video_url=None, video=None, playlist_url='', apply_filters=False, verbose=False, level=0): | |
assert video_url is not None or video is not None, 'needs either video or url' | |
error_msg = 'Error in loading video?' | |
try: | |
if not video: | |
video = YouTube(video_url) | |
error_msg += ' Nope. In extracting title and length?' | |
title, filename, length, video_meta_data = get_title_and_length(video) | |
if apply_filters: | |
to_filter, reason = should_be_filtered(title, length, video_url, playlist_url, MAX_LEN) | |
else: | |
to_filter = False | |
if not to_filter: | |
audio_path = playlist_path + filename + ".mp3" | |
if verbose: print(' ' * level + f'Downloading {title}, Url: {video_url}') | |
if not os.path.exists(audio_path): | |
if length > MAX_LEN and verbose: print(' ' * (level + 2) + f'Long video ({int(length/60)} min), will be cut after {int(MAX_LEN/60)} min.') | |
error_msg += ' Nope. In pipeline video?' | |
kbps = pipeline_video(video, playlist_path, filename) | |
error_msg += ' Nope. In dict filling?' | |
data = dict(title=title, filename=filename, length=length, kbps=kbps, url=video_url, meta=video_meta_data) | |
error_msg += ' Nope. ' | |
else: | |
if verbose: print(' ' * (level + 2) + 'Song already downloaded') | |
data = None | |
return audio_path, data, '' | |
else: | |
return None, None, f'Filtered because {reason}' | |
except: | |
if verbose: print(' ' * (level + 2) + f'Download failed with error {error_msg}') | |
if os.path.exists(audio_path): | |
os.remove(audio_path) | |
return None, None, error_msg + ' Yes.' | |