import pandas as pd import numpy as np import sys import codecs #-------------------Load_data, function that loads the Spotify Dataset 1921-2020, 600k+-------------------------- #-------------------Tracks and checks with an error check if the data has been loaded correctly.---------------- def load_data (path): try: df = pd.read_csv(path) return df except FileNotFoundError: print(f"The document is not found in the directory: {path}") return None except Exception as e: print(f"An error occurred loading the file: {e}") return None path = 'C:\\Users\\34640\\Desktop\\Saturdays.ai\\spotify_dset\\spotify_millsongdata.csv\\spotify_millsongdata.csv' spotify_data = load_data(path) spotify_data.columns = ['artist', 'song', 'link', 'text'] if spotify_data is not None: print("-----------Suscessfully loaded-------------") # print(spotify_data.isnull().sum()) #-----------Fill up white space-----------# for col in spotify_data.columns: spotify_data[col] = spotify_data[col].fillna(spotify_data[col].mode()[0]) #-----------Convert to lower case and delete special characters-----------# spotify_data[col] = spotify_data[col].str.lower().str.replace('[^\w\s]', '', regex=True) #-----------Delete duplicates-----------# spotify_data = spotify_data.drop_duplicates() #print(spotify_data.isnull().sum()) else: print("No spotify data")