Diego-0121 commited on
Commit
77d2ee7
1 Parent(s): a92109f

Create data_processing.py

Browse files
Files changed (1) hide show
  1. data_processing.py +43 -0
data_processing.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import sys
4
+ import codecs
5
+ #-------------------Load_data, function that loads the Spotify Dataset 1921-2020, 600k+--------------------------
6
+ #-------------------Tracks and checks with an error check if the data has been loaded correctly.----------------
7
+
8
+ def load_data (path):
9
+ try:
10
+ df = pd.read_csv(path)
11
+ return df
12
+ except FileNotFoundError:
13
+ print(f"The document is not found in the directory: {path}")
14
+ return None
15
+ except Exception as e:
16
+ print(f"An error occurred loading the file: {e}")
17
+ return None
18
+ path = 'C:\\Users\\34640\\Desktop\\Saturdays.ai\\spotify_dset\\spotify_millsongdata.csv\\spotify_millsongdata.csv'
19
+ spotify_data = load_data(path)
20
+
21
+ spotify_data.columns = ['artist', 'song', 'link', 'text']
22
+
23
+
24
+
25
+ if spotify_data is not None:
26
+ print("-----------Suscessfully loaded-------------")
27
+
28
+
29
+ # print(spotify_data.isnull().sum())
30
+ #-----------Fill up white space-----------#
31
+ for col in spotify_data.columns:
32
+ spotify_data[col] = spotify_data[col].fillna(spotify_data[col].mode()[0])
33
+
34
+ #-----------Convert to lower case and delete special characters-----------#
35
+ spotify_data[col] = spotify_data[col].str.lower().str.replace('[^\w\s]', '', regex=True)
36
+
37
+
38
+ #-----------Delete duplicates-----------#
39
+ spotify_data = spotify_data.drop_duplicates()
40
+
41
+ #print(spotify_data.isnull().sum())
42
+ else:
43
+ print("No spotify data")