tebakaja's picture
[ update ]: remove asyncio features
ea238c4
raw
history blame
1.33 kB
import os
import numpy as np
from warnings import filterwarnings
filterwarnings('ignore')
""" Get Datasets """
def get_datasets(str datasets_path):
cdef list items = os.listdir(datasets_path)
cdef list csv_files = []
cdef str item
for item in items:
if os.path.isfile(os.path.join(datasets_path, item)) and item.endswith('.csv'):
csv_files.append(item)
return sorted(csv_files)
""" Create Sequences """
def create_sequences(df, int sequence_length):
cdef list labels = []
cdef list sequences = []
cdef int i
for i in range(len(df) - sequence_length):
seq = df.iloc[i:i + sequence_length].values
label = df.iloc[i + sequence_length].values[0]
sequences.append(seq)
labels.append(label)
return np.array(sequences), np.array(labels)
""" Pre-Process Data """
def preprocess_data(dataframe):
cdef str col
for col in dataframe.columns:
if dataframe[col].isnull().any():
if dataframe[col].dtype == 'object':
dataframe[col].fillna(dataframe[col].mode()[0], inplace=True)
else:
dataframe[col].fillna(dataframe[col].mean(), inplace=True)
return dataframe
""" Scale Data """
def scale_data(dataframe, scaler_cls):
scaler = scaler_cls()
dataframe['Close'] = scaler.fit_transform(dataframe[['Close']])
return scaler, dataframe