Spaces:
Runtime error
Runtime error
Added supplementary files
Browse files- src/__init__.py +0 -0
- src/utils.py +52 -0
src/__init__.py
ADDED
File without changes
|
src/utils.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
def payday(row):
|
8 |
+
if row.DayOfMonth == 15 or row.Is_month_end == 1:
|
9 |
+
return 1
|
10 |
+
else:
|
11 |
+
return 0
|
12 |
+
|
13 |
+
|
14 |
+
def date_extracts(data):
|
15 |
+
data['Year'] = data.index.year
|
16 |
+
data['Month'] = data.index.month
|
17 |
+
data['DayOfMonth'] = data.index.day
|
18 |
+
data['DaysInMonth'] = data.index.days_in_month
|
19 |
+
data['DayOfYear'] = data.index.day_of_year
|
20 |
+
data['DayOfWeek'] = data.index.dayofweek
|
21 |
+
data['Week'] = data.index.isocalendar().week
|
22 |
+
data['Is_weekend'] = np.where(data['DayOfWeek'] > 4, 1, 0)
|
23 |
+
data['Is_month_start'] = data.index.is_month_start.astype(int)
|
24 |
+
data['Is_month_end'] = data.index.is_month_end.astype(int)
|
25 |
+
data['Quarter'] = data.index.quarter
|
26 |
+
data['Is_quarter_start'] = data.index.is_quarter_start.astype(int)
|
27 |
+
data['Is_quarter_end'] = data.index.is_quarter_end.astype(int)
|
28 |
+
data['Is_year_start'] = data.index.is_year_start.astype(int)
|
29 |
+
data['Is_year_end'] = data.index.is_year_end.astype(int)
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
# the function creates a dataframe from the inputs
|
35 |
+
def create_dataframe(arr):
|
36 |
+
X = np.array([arr])
|
37 |
+
data = pd.DataFrame(X, columns=['date', 'Store_number', 'Family', 'Item_onpromo', 'Oil_prices',
|
38 |
+
'Holiday_level', 'Holiday_city','TypeOfDay', 'Store_city',
|
39 |
+
'Store_state', 'Store_type', 'Cluster'])
|
40 |
+
data[['Store_number', 'Item_onpromo', 'Cluster']] = data [['Store_number', 'Item_onpromo', 'Cluster']].apply(lambda x: x.astype(int))
|
41 |
+
data['date'] = pd.to_datetime(data['date'])
|
42 |
+
|
43 |
+
return data
|
44 |
+
|
45 |
+
def process_data(data, categorical_pipeline, numerical_pipeliine, cat_cols, num_cols):
|
46 |
+
processed_data = data.set_index('date')
|
47 |
+
date_extracts(processed_data)
|
48 |
+
processed_data['Is_payday']= processed_data[['DayOfMonth', 'Is_month_end']].apply(payday, axis=1)
|
49 |
+
processed_data[cat_cols] = categorical_pipeline.transform(processed_data[cat_cols])
|
50 |
+
processed_data[num_cols] = numerical_pipeliine.transform(processed_data[num_cols])
|
51 |
+
return processed_data
|
52 |
+
|