Spaces:

Saturdays
/

Harmonize

Sleeping

Harmonize / data_processing.py

Create data_processing.py

77d2ee7 11 months ago

1.44 kB

	import pandas as pd
	import numpy as np
	import sys
	import codecs
	#-------------------Load_data, function that loads the Spotify Dataset 1921-2020, 600k+--------------------------
	#-------------------Tracks and checks with an error check if the data has been loaded correctly.----------------

	def load_data (path):
	try:
	df = pd.read_csv(path)
	return df
	except FileNotFoundError:
	print(f"The document is not found in the directory: {path}")
	return None
	except Exception as e:
	print(f"An error occurred loading the file: {e}")
	return None
	path = 'C:\\Users\\34640\\Desktop\\Saturdays.ai\\spotify_dset\\spotify_millsongdata.csv\\spotify_millsongdata.csv'
	spotify_data = load_data(path)

	spotify_data.columns = ['artist', 'song', 'link', 'text']



	if spotify_data is not None:
	print("-----------Suscessfully loaded-------------")


	# print(spotify_data.isnull().sum())
	#-----------Fill up white space-----------#
	for col in spotify_data.columns:
	spotify_data[col] = spotify_data[col].fillna(spotify_data[col].mode()[0])

	#-----------Convert to lower case and delete special characters-----------#
	spotify_data[col] = spotify_data[col].str.lower().str.replace('[^\w\s]', '', regex=True)


	#-----------Delete duplicates-----------#
	spotify_data = spotify_data.drop_duplicates()

	#print(spotify_data.isnull().sum())
	else:
	print("No spotify data")