bright1 commited on
Commit
b36b4a2
1 Parent(s): 3d4b265

Added supplementary files

Browse files
Files changed (2) hide show
  1. src/__init__.py +0 -0
  2. src/utils.py +52 -0
src/__init__.py ADDED
File without changes
src/utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+
6
+
7
+ def payday(row):
8
+ if row.DayOfMonth == 15 or row.Is_month_end == 1:
9
+ return 1
10
+ else:
11
+ return 0
12
+
13
+
14
+ def date_extracts(data):
15
+ data['Year'] = data.index.year
16
+ data['Month'] = data.index.month
17
+ data['DayOfMonth'] = data.index.day
18
+ data['DaysInMonth'] = data.index.days_in_month
19
+ data['DayOfYear'] = data.index.day_of_year
20
+ data['DayOfWeek'] = data.index.dayofweek
21
+ data['Week'] = data.index.isocalendar().week
22
+ data['Is_weekend'] = np.where(data['DayOfWeek'] > 4, 1, 0)
23
+ data['Is_month_start'] = data.index.is_month_start.astype(int)
24
+ data['Is_month_end'] = data.index.is_month_end.astype(int)
25
+ data['Quarter'] = data.index.quarter
26
+ data['Is_quarter_start'] = data.index.is_quarter_start.astype(int)
27
+ data['Is_quarter_end'] = data.index.is_quarter_end.astype(int)
28
+ data['Is_year_start'] = data.index.is_year_start.astype(int)
29
+ data['Is_year_end'] = data.index.is_year_end.astype(int)
30
+
31
+
32
+
33
+
34
+ # the function creates a dataframe from the inputs
35
+ def create_dataframe(arr):
36
+ X = np.array([arr])
37
+ data = pd.DataFrame(X, columns=['date', 'Store_number', 'Family', 'Item_onpromo', 'Oil_prices',
38
+ 'Holiday_level', 'Holiday_city','TypeOfDay', 'Store_city',
39
+ 'Store_state', 'Store_type', 'Cluster'])
40
+ data[['Store_number', 'Item_onpromo', 'Cluster']] = data [['Store_number', 'Item_onpromo', 'Cluster']].apply(lambda x: x.astype(int))
41
+ data['date'] = pd.to_datetime(data['date'])
42
+
43
+ return data
44
+
45
+ def process_data(data, categorical_pipeline, numerical_pipeliine, cat_cols, num_cols):
46
+ processed_data = data.set_index('date')
47
+ date_extracts(processed_data)
48
+ processed_data['Is_payday']= processed_data[['DayOfMonth', 'Is_month_end']].apply(payday, axis=1)
49
+ processed_data[cat_cols] = categorical_pipeline.transform(processed_data[cat_cols])
50
+ processed_data[num_cols] = numerical_pipeliine.transform(processed_data[num_cols])
51
+ return processed_data
52
+