Uledennis commited on
Commit
ee452b4
β€’
1 Parent(s): 8fe77cb

updated name

Browse files
Files changed (1) hide show
  1. app.py +415 -416
app.py CHANGED
@@ -1,417 +1,416 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import time
4
- from datetime import datetime
5
-
6
- import numpy as np
7
- import pmdarima as pm
8
- import matplotlib.pyplot as plt
9
- from pmdarima import auto_arima
10
- import plotly.graph_objects as go
11
-
12
- import torch
13
- from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
14
-
15
- st.set_page_config(
16
- page_title="Sales Forecasting System",
17
- page_icon="πŸ“ˆ",
18
- layout="wide",
19
- initial_sidebar_state="expanded",
20
- )
21
-
22
- # Preprocessing
23
- @st.cache_data
24
- def merge(B, C, A):
25
- i = j = k = 0
26
-
27
- # Convert 'Date' columns to datetime.date objects
28
- B['Date'] = pd.to_datetime(B['Date']).dt.date
29
- C['Date'] = pd.to_datetime(C['Date']).dt.date
30
- A['Date'] = pd.to_datetime(A['Date']).dt.date
31
-
32
- while i < len(B) and j < len(C):
33
- if B['Date'].iloc[i] <= C['Date'].iloc[j]:
34
- A['Date'].iloc[k] = B['Date'].iloc[i]
35
- A['Sales'].iloc[k] = B['Sales'].iloc[i]
36
- i += 1
37
-
38
- else:
39
- A['Date'].iloc[k] = C['Date'].iloc[j]
40
- A['Sales'].iloc[k] = C['Sales'].iloc[j]
41
- j += 1
42
- k += 1
43
-
44
- while i < len(B):
45
- A['Date'].iloc[k] = B['Date'].iloc[i]
46
- A['Sales'].iloc[k] = B['Sales'].iloc[i]
47
- i += 1
48
- k += 1
49
-
50
- while j < len(C):
51
- A['Date'].iloc[k] = C['Date'].iloc[j]
52
- A['Sales'].iloc[k] = C['Sales'].iloc[j]
53
- j += 1
54
- k += 1
55
-
56
- return A
57
-
58
- @st.cache_data
59
- def merge_sort(dataframe):
60
- if len(dataframe) > 1:
61
- center = len(dataframe) // 2
62
- left = dataframe.iloc[:center]
63
- right = dataframe.iloc[center:]
64
- merge_sort(left)
65
- merge_sort(right)
66
-
67
- return merge(left, right, dataframe)
68
-
69
- else:
70
- return dataframe
71
-
72
- @st.cache_data
73
- def drop (dataframe):
74
- def get_columns_containing(dataframe, substrings):
75
- return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
76
-
77
- columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
78
- dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
79
- dataframe = dataframe.dropna()
80
-
81
- return dataframe
82
-
83
- @st.cache_data
84
- def date_format(dataframe):
85
- for i, d, s in dataframe.itertuples():
86
- dataframe['Date'][i] = dataframe['Date'][i].strip()
87
-
88
- for i, d, s in dataframe.itertuples():
89
- new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
90
- dataframe['Date'][i] = new_date
91
-
92
- return dataframe
93
-
94
- @st.cache_data
95
- def group_to_three(dataframe):
96
- dataframe['Date'] = pd.to_datetime(dataframe['Date'])
97
- dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
98
- dataframe = dataframe.replace(0, np.nan).dropna()
99
-
100
- return dataframe
101
-
102
- @st.cache_data
103
- def series_to_df_exogenous(series):
104
- dataframe = series.to_frame()
105
- dataframe = dataframe.reset_index()
106
- dataframe = dataframe.set_index('Date')
107
- dataframe = dataframe.dropna()
108
- # Create the eXogenous values
109
- dataframe['Sales First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(1)
110
- dataframe['Seasonal First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(12)
111
- dataframe = dataframe.dropna()
112
- return dataframe
113
-
114
- @st.cache_data
115
- def dates_df(dataframe):
116
- dataframe = dataframe.reset_index()
117
- dataframe['Date'] = dataframe['Date'].dt.strftime('%B %d, %Y')
118
- dataframe[dataframe.columns] = dataframe[dataframe.columns].astype(str)
119
- return dataframe
120
-
121
- @st.cache_data
122
- def get_forecast_period(period):
123
- return round(period / 3)
124
-
125
- # SARIMAX Model
126
- @st.cache_data
127
- def train_test(dataframe, n):
128
- training_y = dataframe.iloc[:-n,0]
129
- test_y = dataframe.iloc[-n:,0]
130
- test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
131
- training_X = dataframe.iloc[:-n,1:]
132
- test_X = dataframe.iloc[-n:,1:]
133
- future_X = dataframe.iloc[0:,1:]
134
- return (training_y, test_y, test_y_series, training_X, test_X, future_X)
135
-
136
- @st.cache_data
137
- def test_fitting(dataframe, Exo, trainY):
138
- trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
139
- test='adf',min_p=1,min_q=1,
140
- max_p=3, max_q=3, m=12,
141
- start_P=2, start_Q=2, seasonal=True,
142
- d=None, D=1, trace=True,
143
- error_action='ignore',
144
- suppress_warnings=True,
145
- stepwise=True, maxiter = 50)
146
- model = trainTestModel
147
- return model
148
-
149
- def forecast_accuracy(forecast, actual):
150
- mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
151
- rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
152
- corr = np.corrcoef(forecast, actual)[0,1] # corr
153
- mins = np.amin(np.hstack([forecast[:,None],
154
- actual[:,None]]), axis=1)
155
- maxs = np.amax(np.hstack([forecast[:,None],
156
- actual[:,None]]), axis=1)
157
- minmax = 1 - np.mean(mins/maxs) # minmax
158
- return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
159
-
160
- @st.cache_data
161
- def sales_growth(dataframe, fittedValues):
162
- sales_growth = fittedValues.to_frame()
163
- sales_growth = sales_growth.reset_index()
164
- sales_growth.columns = ("Date", "Sales")
165
- sales_growth = sales_growth.set_index('Date')
166
-
167
- sales_growth['Sales'] = (sales_growth['Sales']).round(2)
168
-
169
- # Calculate and create the column for sales difference and growth
170
- sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
171
- sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
172
-
173
- # Calculate and create the first row for sales difference and growth
174
- sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
175
- sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
176
-
177
- return sales_growth
178
-
179
- @st.cache_data
180
- def merge_forecast_data(actual, predicted, future): # debug
181
- actual = actual.to_frame()
182
- print("BEFORE RENAME ACTUAL")
183
- print(actual)
184
- actual.rename(columns={actual.columns[0]: "Actual Sales"}, inplace=True)
185
- print("ACTUAL")
186
- print(actual)
187
-
188
- predicted = predicted.to_frame()
189
- predicted.rename(columns={predicted.columns[0]: "Predicted Sales"}, inplace=True)
190
- print("PREDICTED")
191
- print(predicted)
192
-
193
- future = future.to_frame()
194
- future = future.rename_axis('Date')
195
- future.rename(columns={future.columns[0]: "Forecasted Future Sales"}, inplace=True)
196
- print("FUTURE")
197
- print(future)
198
-
199
- merged_dataframe = pd.concat([actual, predicted, future], axis=1)
200
- print("MERGED DATAFRAME")
201
- print(merged_dataframe)
202
- merged_dataframe = merged_dataframe.reset_index()
203
- print("MERGED DATAFRAME RESET INDEX")
204
- print(merged_dataframe)
205
- return merged_dataframe
206
-
207
- def interpret_mape(mape_score):
208
- score = (mape_score * 100).round(2)
209
- if score < 10:
210
- interpretation = "Great"
211
- color = "green"
212
- elif score < 20:
213
- interpretation = "Good"
214
- color = "seagreen"
215
- elif score < 50:
216
- interpretation = "Relatively good"
217
- color = "orange"
218
- else:
219
- interpretation = "Poor"
220
- color = "red"
221
- return score, interpretation, color
222
-
223
- # TAPAS Model
224
-
225
- @st.cache_resource
226
- def load_tapas_model():
227
- model_name = "google/tapas-large-finetuned-wtq"
228
- tokenizer = TapasTokenizer.from_pretrained(model_name)
229
- model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
230
- pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
231
- return pipe
232
-
233
- pipe = load_tapas_model()
234
-
235
- def get_answer(table, query):
236
- answers = pipe(table=table, query=query)
237
- return answers
238
-
239
- def convert_answer(answer):
240
- if answer['aggregator'] == 'SUM':
241
- cells = answer['cells']
242
- converted = sum(float(value.replace(',', '')) for value in cells)
243
- return converted
244
-
245
- if answer['aggregator'] == 'AVERAGE':
246
- cells = answer['cells']
247
- values = [float(value.replace(',', '')) for value in cells]
248
- converted = sum(values) / len(values)
249
- return converted
250
-
251
- if answer['aggregator'] == 'COUNT':
252
- cells = answer['cells']
253
- converted = sum(int(value.replace(',', '')) for value in cells)
254
- return converted
255
-
256
- else:
257
-
258
- return answer['answer']
259
-
260
- def get_converted_answer(table, query):
261
- converted_answer = convert_answer(get_answer(table, query))
262
- return converted_answer
263
-
264
- # Session States
265
- if 'uploaded' not in st.session_state:
266
- st.session_state.uploaded = False
267
-
268
- if 'forecasted' not in st.session_state:
269
- st.session_state.forecasted = False
270
-
271
- # Web Application
272
- st.title("Forecasting Dashboard πŸ“ˆ")
273
- if not st.session_state.uploaded:
274
- st.subheader("Welcome User, get started forecasting by uploading your file in the sidebar!")
275
-
276
- # Sidebar Menu
277
- with st.sidebar:
278
- # TODO Name for product
279
- st.title("MLCast v1.1")
280
- st.subheader("An intelligent sales forecasting system")
281
- uploaded_file = st.file_uploader("Upload your store data here to proceed (must atleast contain Date and Sales)", type=["csv"])
282
- if uploaded_file is not None:
283
- date_found = False
284
- sales_found = False
285
- df = pd.read_csv(uploaded_file, parse_dates=True)
286
- for column in df.columns:
287
- if 'Date' in column:
288
- date_found = True
289
- if 'Sales' in column:
290
- sales_found = True
291
- if(date_found == False or sales_found == False):
292
- st.error('Please upload a csv containing both Date and Sales...')
293
- st.stop()
294
-
295
- st.success("File uploaded successfully!")
296
- st.write("Your uploaded data:")
297
- st.write(df)
298
-
299
- df = drop(df)
300
- df = date_format(df)
301
- merge_sort(df)
302
- series = group_to_three(df)
303
-
304
- st.session_state.uploaded = True
305
-
306
- with open('sample.csv', 'rb') as f:
307
- st.download_button("Download our sample CSV", f, file_name='sample.csv')
308
-
309
- if (st.session_state.uploaded):
310
- st.subheader("Sales History")
311
- st.line_chart(series)
312
-
313
- MIN_DAYS = 30
314
- MAX_DAYS = 90
315
- period = st.slider('How many days would you like to forecast?', min_value=MIN_DAYS, max_value=MAX_DAYS)
316
- forecast_period = get_forecast_period(period)
317
-
318
- forecast_button = st.button(
319
- 'Start Forecasting',
320
- key='forecast_button',
321
- type="primary",
322
- )
323
-
324
- if (forecast_button or st.session_state.forecasted):
325
- df = series_to_df_exogenous(series)
326
- n_periods = round(len(df) * 0.2)
327
- print(n_periods) # debug
328
-
329
- train = train_test(df, n_periods)
330
- training_y, test_y, test_y_series, training_X, test_X, future_X = train
331
- train_test_model = test_fitting(df, training_X, training_y)
332
-
333
- print(df) # debug
334
- print(len(df)) # debug
335
-
336
- future_n_periods = forecast_period
337
- fitted, confint = train_test_model.predict(X=test_X, n_periods=n_periods, return_conf_int=True)
338
- index_of_fc = test_y_series.index
339
-
340
- # make series for plotting purpose
341
- fitted_series = pd.Series(fitted)
342
- fitted_series.index = index_of_fc
343
- lower_series = pd.Series(confint[:, 0], index=index_of_fc)
344
- upper_series = pd.Series(confint[:, 1], index=index_of_fc)
345
-
346
- #Future predictions
347
- frequency = '3D'
348
- future_fitted, confint = train_test_model.predict(X=df.iloc[-future_n_periods:,1:], n_periods=future_n_periods, return_conf_int=True, freq=frequency)
349
- future_index_of_fc = pd.date_range(df['Sales'].index[-1], periods = future_n_periods, freq=frequency)
350
-
351
- # make series for future plotting purpose
352
- future_fitted_series = pd.Series(future_fitted)
353
- future_fitted_series.index = future_index_of_fc
354
- # future_lower_series = pd.Series(confint[:, 0], index=future_index_of_fc)
355
- # future_upper_series = pd.Series(confint[:, 1], index=future_index_of_fc)
356
-
357
- future_sales_growth = sales_growth(df, future_fitted_series)
358
-
359
- test_y, predictions = np.array(test_y), np.array(fitted)
360
- print("Test Y:", test_y) # debug
361
- print("Prediction:", fitted) # debug
362
- score = forecast_accuracy(predictions, test_y)
363
- print("Score:", score) # debug
364
- mape, interpretation, mape_color = interpret_mape(score['mape'])
365
-
366
- print(df)
367
- print(df['Sales'])
368
- merged_data = merge_forecast_data(df['Sales'], fitted_series, future_fitted_series)
369
-
370
- col_charts = st.columns(2)
371
-
372
- print(merged_data) # debug
373
- print(merged_data.info)
374
- print(merged_data.dtypes)
375
- with col_charts[0]:
376
- fig_compare = go.Figure()
377
- fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
378
- fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Predicted Sales'], mode='lines', name='Predicted Sales', line=dict(color='#006400')))
379
- fig_compare.update_layout(title='Historical Sales Data', xaxis_title='Date', yaxis_title='Sales')
380
- st.plotly_chart(fig_compare, use_container_width=True)
381
-
382
- with col_charts[1]:
383
- fig_forecast = go.Figure()
384
- fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
385
- fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Forecasted Future Sales'], mode='lines', name='Future Forecasted Sales', line=dict(color=mape_color)))
386
- fig_forecast.update_layout(title='Forecasted Sales Data', xaxis_title='Date', yaxis_title='Sales')
387
- st.plotly_chart(fig_forecast, use_container_width=True)
388
- st.write(f"MAPE score: {mape}% - {interpretation}")
389
-
390
- df = dates_df(future_sales_growth)
391
-
392
- col_table = st.columns(2)
393
- with col_table[0]:
394
- col_table[0].subheader(f"Forecasted sales in the next {period} days")
395
- col_table[0].write(df)
396
-
397
- with col_table[1]:
398
- col_table[1] = st.subheader("Question-Answering")
399
- with st.form("question_form"):
400
- question = st.text_input('Ask a Question about the Forecasted Data', placeholder="What is the total sales in the month of December?")
401
- query_button = st.form_submit_button(label='Generate Answer')
402
- if query_button or question:
403
- answer = get_converted_answer(df, question)
404
- if answer is not None:
405
- st.write("The answer is:", answer)
406
- else:
407
- st.write("Answer is not found in table")
408
- st.session_state.forecasted = True
409
-
410
-
411
- # Hide Streamlit default style
412
- hide_st_style = """
413
- <style>
414
- footer {visibility: hidden;}
415
- </style>
416
- """
417
  st.markdown(hide_st_style, unsafe_allow_html=True)
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ from datetime import datetime
5
+
6
+ import numpy as np
7
+ import pmdarima as pm
8
+ import matplotlib.pyplot as plt
9
+ from pmdarima import auto_arima
10
+ import plotly.graph_objects as go
11
+
12
+ import torch
13
+ from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
14
+
15
+ st.set_page_config(
16
+ page_title="Sales Predictor-AI Project",
17
+ page_icon="πŸ“ˆ",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded",
20
+ )
21
+
22
+ # Preprocessing
23
+ @st.cache_data
24
+ def merge(B, C, A):
25
+ i = j = k = 0
26
+
27
+ # Convert 'Date' columns to datetime.date objects
28
+ B['Date'] = pd.to_datetime(B['Date']).dt.date
29
+ C['Date'] = pd.to_datetime(C['Date']).dt.date
30
+ A['Date'] = pd.to_datetime(A['Date']).dt.date
31
+
32
+ while i < len(B) and j < len(C):
33
+ if B['Date'].iloc[i] <= C['Date'].iloc[j]:
34
+ A['Date'].iloc[k] = B['Date'].iloc[i]
35
+ A['Sales'].iloc[k] = B['Sales'].iloc[i]
36
+ i += 1
37
+
38
+ else:
39
+ A['Date'].iloc[k] = C['Date'].iloc[j]
40
+ A['Sales'].iloc[k] = C['Sales'].iloc[j]
41
+ j += 1
42
+ k += 1
43
+
44
+ while i < len(B):
45
+ A['Date'].iloc[k] = B['Date'].iloc[i]
46
+ A['Sales'].iloc[k] = B['Sales'].iloc[i]
47
+ i += 1
48
+ k += 1
49
+
50
+ while j < len(C):
51
+ A['Date'].iloc[k] = C['Date'].iloc[j]
52
+ A['Sales'].iloc[k] = C['Sales'].iloc[j]
53
+ j += 1
54
+ k += 1
55
+
56
+ return A
57
+
58
+ @st.cache_data
59
+ def merge_sort(dataframe):
60
+ if len(dataframe) > 1:
61
+ center = len(dataframe) // 2
62
+ left = dataframe.iloc[:center]
63
+ right = dataframe.iloc[center:]
64
+ merge_sort(left)
65
+ merge_sort(right)
66
+
67
+ return merge(left, right, dataframe)
68
+
69
+ else:
70
+ return dataframe
71
+
72
+ @st.cache_data
73
+ def drop (dataframe):
74
+ def get_columns_containing(dataframe, substrings):
75
+ return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
76
+
77
+ columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
78
+ dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
79
+ dataframe = dataframe.dropna()
80
+
81
+ return dataframe
82
+
83
+ @st.cache_data
84
+ def date_format(dataframe):
85
+ for i, d, s in dataframe.itertuples():
86
+ dataframe['Date'][i] = dataframe['Date'][i].strip()
87
+
88
+ for i, d, s in dataframe.itertuples():
89
+ new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
90
+ dataframe['Date'][i] = new_date
91
+
92
+ return dataframe
93
+
94
+ @st.cache_data
95
+ def group_to_three(dataframe):
96
+ dataframe['Date'] = pd.to_datetime(dataframe['Date'])
97
+ dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
98
+ dataframe = dataframe.replace(0, np.nan).dropna()
99
+
100
+ return dataframe
101
+
102
+ @st.cache_data
103
+ def series_to_df_exogenous(series):
104
+ dataframe = series.to_frame()
105
+ dataframe = dataframe.reset_index()
106
+ dataframe = dataframe.set_index('Date')
107
+ dataframe = dataframe.dropna()
108
+ # Create the eXogenous values
109
+ dataframe['Sales First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(1)
110
+ dataframe['Seasonal First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(12)
111
+ dataframe = dataframe.dropna()
112
+ return dataframe
113
+
114
+ @st.cache_data
115
+ def dates_df(dataframe):
116
+ dataframe = dataframe.reset_index()
117
+ dataframe['Date'] = dataframe['Date'].dt.strftime('%B %d, %Y')
118
+ dataframe[dataframe.columns] = dataframe[dataframe.columns].astype(str)
119
+ return dataframe
120
+
121
+ @st.cache_data
122
+ def get_forecast_period(period):
123
+ return round(period / 3)
124
+
125
+ # SARIMAX Model
126
+ @st.cache_data
127
+ def train_test(dataframe, n):
128
+ training_y = dataframe.iloc[:-n,0]
129
+ test_y = dataframe.iloc[-n:,0]
130
+ test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
131
+ training_X = dataframe.iloc[:-n,1:]
132
+ test_X = dataframe.iloc[-n:,1:]
133
+ future_X = dataframe.iloc[0:,1:]
134
+ return (training_y, test_y, test_y_series, training_X, test_X, future_X)
135
+
136
+ @st.cache_data
137
+ def test_fitting(dataframe, Exo, trainY):
138
+ trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
139
+ test='adf',min_p=1,min_q=1,
140
+ max_p=3, max_q=3, m=12,
141
+ start_P=2, start_Q=2, seasonal=True,
142
+ d=None, D=1, trace=True,
143
+ error_action='ignore',
144
+ suppress_warnings=True,
145
+ stepwise=True, maxiter = 50)
146
+ model = trainTestModel
147
+ return model
148
+
149
+ def forecast_accuracy(forecast, actual):
150
+ mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
151
+ rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
152
+ corr = np.corrcoef(forecast, actual)[0,1] # corr
153
+ mins = np.amin(np.hstack([forecast[:,None],
154
+ actual[:,None]]), axis=1)
155
+ maxs = np.amax(np.hstack([forecast[:,None],
156
+ actual[:,None]]), axis=1)
157
+ minmax = 1 - np.mean(mins/maxs) # minmax
158
+ return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
159
+
160
+ @st.cache_data
161
+ def sales_growth(dataframe, fittedValues):
162
+ sales_growth = fittedValues.to_frame()
163
+ sales_growth = sales_growth.reset_index()
164
+ sales_growth.columns = ("Date", "Sales")
165
+ sales_growth = sales_growth.set_index('Date')
166
+
167
+ sales_growth['Sales'] = (sales_growth['Sales']).round(2)
168
+
169
+ # Calculate and create the column for sales difference and growth
170
+ sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
171
+ sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
172
+
173
+ # Calculate and create the first row for sales difference and growth
174
+ sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
175
+ sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
176
+
177
+ return sales_growth
178
+
179
+ @st.cache_data
180
+ def merge_forecast_data(actual, predicted, future): # debug
181
+ actual = actual.to_frame()
182
+ print("BEFORE RENAME ACTUAL")
183
+ print(actual)
184
+ actual.rename(columns={actual.columns[0]: "Actual Sales"}, inplace=True)
185
+ print("ACTUAL")
186
+ print(actual)
187
+
188
+ predicted = predicted.to_frame()
189
+ predicted.rename(columns={predicted.columns[0]: "Predicted Sales"}, inplace=True)
190
+ print("PREDICTED")
191
+ print(predicted)
192
+
193
+ future = future.to_frame()
194
+ future = future.rename_axis('Date')
195
+ future.rename(columns={future.columns[0]: "Forecasted Future Sales"}, inplace=True)
196
+ print("FUTURE")
197
+ print(future)
198
+
199
+ merged_dataframe = pd.concat([actual, predicted, future], axis=1)
200
+ print("MERGED DATAFRAME")
201
+ print(merged_dataframe)
202
+ merged_dataframe = merged_dataframe.reset_index()
203
+ print("MERGED DATAFRAME RESET INDEX")
204
+ print(merged_dataframe)
205
+ return merged_dataframe
206
+
207
+ def interpret_mape(mape_score):
208
+ score = (mape_score * 100).round(2)
209
+ if score < 10:
210
+ interpretation = "Great"
211
+ color = "green"
212
+ elif score < 20:
213
+ interpretation = "Good"
214
+ color = "seagreen"
215
+ elif score < 50:
216
+ interpretation = "Relatively good"
217
+ color = "orange"
218
+ else:
219
+ interpretation = "Poor"
220
+ color = "red"
221
+ return score, interpretation, color
222
+
223
+ # TAPAS Model
224
+
225
+ @st.cache_resource
226
+ def load_tapas_model():
227
+ model_name = "google/tapas-large-finetuned-wtq"
228
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
229
+ model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
230
+ pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
231
+ return pipe
232
+
233
+ pipe = load_tapas_model()
234
+
235
+ def get_answer(table, query):
236
+ answers = pipe(table=table, query=query)
237
+ return answers
238
+
239
+ def convert_answer(answer):
240
+ if answer['aggregator'] == 'SUM':
241
+ cells = answer['cells']
242
+ converted = sum(float(value.replace(',', '')) for value in cells)
243
+ return converted
244
+
245
+ if answer['aggregator'] == 'AVERAGE':
246
+ cells = answer['cells']
247
+ values = [float(value.replace(',', '')) for value in cells]
248
+ converted = sum(values) / len(values)
249
+ return converted
250
+
251
+ if answer['aggregator'] == 'COUNT':
252
+ cells = answer['cells']
253
+ converted = sum(int(value.replace(',', '')) for value in cells)
254
+ return converted
255
+
256
+ else:
257
+
258
+ return answer['answer']
259
+
260
+ def get_converted_answer(table, query):
261
+ converted_answer = convert_answer(get_answer(table, query))
262
+ return converted_answer
263
+
264
+ # Session States
265
+ if 'uploaded' not in st.session_state:
266
+ st.session_state.uploaded = False
267
+
268
+ if 'forecasted' not in st.session_state:
269
+ st.session_state.forecasted = False
270
+
271
+ # Web Application
272
+ st.title("Forecasting Dashboard πŸ“ˆ")
273
+ if not st.session_state.uploaded:
274
+ st.subheader("Welcome User, get started forecasting by uploading your file in the sidebar!")
275
+
276
+ # Sidebar Menu
277
+ with st.sidebar:
278
+ st.title("Forecaster v1.1")
279
+ st.subheader("An intelligent sales forecasting system")
280
+ uploaded_file = st.file_uploader("Upload your store data here to proceed (must atleast contain Date and Sales)", type=["csv"])
281
+ if uploaded_file is not None:
282
+ date_found = False
283
+ sales_found = False
284
+ df = pd.read_csv(uploaded_file, parse_dates=True)
285
+ for column in df.columns:
286
+ if 'Date' in column:
287
+ date_found = True
288
+ if 'Sales' in column:
289
+ sales_found = True
290
+ if(date_found == False or sales_found == False):
291
+ st.error('Please upload a csv containing both Date and Sales...')
292
+ st.stop()
293
+
294
+ st.success("File uploaded successfully!")
295
+ st.write("Your uploaded data:")
296
+ st.write(df)
297
+
298
+ df = drop(df)
299
+ df = date_format(df)
300
+ merge_sort(df)
301
+ series = group_to_three(df)
302
+
303
+ st.session_state.uploaded = True
304
+
305
+ with open('sample.csv', 'rb') as f:
306
+ st.download_button("Download our sample CSV", f, file_name='sample.csv')
307
+
308
+ if (st.session_state.uploaded):
309
+ st.subheader("Sales History")
310
+ st.line_chart(series)
311
+
312
+ MIN_DAYS = 30
313
+ MAX_DAYS = 90
314
+ period = st.slider('How many days would you like to forecast?', min_value=MIN_DAYS, max_value=MAX_DAYS)
315
+ forecast_period = get_forecast_period(period)
316
+
317
+ forecast_button = st.button(
318
+ 'Start Forecasting',
319
+ key='forecast_button',
320
+ type="primary",
321
+ )
322
+
323
+ if (forecast_button or st.session_state.forecasted):
324
+ df = series_to_df_exogenous(series)
325
+ n_periods = round(len(df) * 0.2)
326
+ print(n_periods) # debug
327
+
328
+ train = train_test(df, n_periods)
329
+ training_y, test_y, test_y_series, training_X, test_X, future_X = train
330
+ train_test_model = test_fitting(df, training_X, training_y)
331
+
332
+ print(df) # debug
333
+ print(len(df)) # debug
334
+
335
+ future_n_periods = forecast_period
336
+ fitted, confint = train_test_model.predict(X=test_X, n_periods=n_periods, return_conf_int=True)
337
+ index_of_fc = test_y_series.index
338
+
339
+ # make series for plotting purpose
340
+ fitted_series = pd.Series(fitted)
341
+ fitted_series.index = index_of_fc
342
+ lower_series = pd.Series(confint[:, 0], index=index_of_fc)
343
+ upper_series = pd.Series(confint[:, 1], index=index_of_fc)
344
+
345
+ #Future predictions
346
+ frequency = '3D'
347
+ future_fitted, confint = train_test_model.predict(X=df.iloc[-future_n_periods:,1:], n_periods=future_n_periods, return_conf_int=True, freq=frequency)
348
+ future_index_of_fc = pd.date_range(df['Sales'].index[-1], periods = future_n_periods, freq=frequency)
349
+
350
+ # make series for future plotting purpose
351
+ future_fitted_series = pd.Series(future_fitted)
352
+ future_fitted_series.index = future_index_of_fc
353
+ # future_lower_series = pd.Series(confint[:, 0], index=future_index_of_fc)
354
+ # future_upper_series = pd.Series(confint[:, 1], index=future_index_of_fc)
355
+
356
+ future_sales_growth = sales_growth(df, future_fitted_series)
357
+
358
+ test_y, predictions = np.array(test_y), np.array(fitted)
359
+ print("Test Y:", test_y) # debug
360
+ print("Prediction:", fitted) # debug
361
+ score = forecast_accuracy(predictions, test_y)
362
+ print("Score:", score) # debug
363
+ mape, interpretation, mape_color = interpret_mape(score['mape'])
364
+
365
+ print(df)
366
+ print(df['Sales'])
367
+ merged_data = merge_forecast_data(df['Sales'], fitted_series, future_fitted_series)
368
+
369
+ col_charts = st.columns(2)
370
+
371
+ print(merged_data) # debug
372
+ print(merged_data.info)
373
+ print(merged_data.dtypes)
374
+ with col_charts[0]:
375
+ fig_compare = go.Figure()
376
+ fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
377
+ fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Predicted Sales'], mode='lines', name='Predicted Sales', line=dict(color='#006400')))
378
+ fig_compare.update_layout(title='Historical Sales Data', xaxis_title='Date', yaxis_title='Sales')
379
+ st.plotly_chart(fig_compare, use_container_width=True)
380
+
381
+ with col_charts[1]:
382
+ fig_forecast = go.Figure()
383
+ fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
384
+ fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Forecasted Future Sales'], mode='lines', name='Future Forecasted Sales', line=dict(color=mape_color)))
385
+ fig_forecast.update_layout(title='Forecasted Sales Data', xaxis_title='Date', yaxis_title='Sales')
386
+ st.plotly_chart(fig_forecast, use_container_width=True)
387
+ st.write(f"MAPE score: {mape}% - {interpretation}")
388
+
389
+ df = dates_df(future_sales_growth)
390
+
391
+ col_table = st.columns(2)
392
+ with col_table[0]:
393
+ col_table[0].subheader(f"Forecasted sales in the next {period} days")
394
+ col_table[0].write(df)
395
+
396
+ with col_table[1]:
397
+ col_table[1] = st.subheader("Question-Answering")
398
+ with st.form("question_form"):
399
+ question = st.text_input('Ask a Question about the Forecasted Data', placeholder="What is the total sales in the month of December?")
400
+ query_button = st.form_submit_button(label='Generate Answer')
401
+ if query_button or question:
402
+ answer = get_converted_answer(df, question)
403
+ if answer is not None:
404
+ st.write("The answer is:", answer)
405
+ else:
406
+ st.write("Answer is not found in table")
407
+ st.session_state.forecasted = True
408
+
409
+
410
+ # Hide Streamlit default style
411
+ hide_st_style = """
412
+ <style>
413
+ footer {visibility: hidden;}
414
+ </style>
415
+ """
 
416
  st.markdown(hide_st_style, unsafe_allow_html=True)