alexander-lazarin commited on
Commit
62d6afe
1 Parent(s): dd9c62c

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +154 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from prophet import Prophet
4
+ import plotly.graph_objs as go
5
+ import re
6
+
7
+ # Dictionary to map Russian month names to month numbers
8
+ russian_months = {
9
+ "январь": "01",
10
+ "февраль": "02",
11
+ "март": "03",
12
+ "апрель": "04",
13
+ "май": "05",
14
+ "июнь": "06",
15
+ "июль": "07",
16
+ "август": "08",
17
+ "сентябрь": "09",
18
+ "октябрь": "10",
19
+ "ноябрь": "11",
20
+ "декабрь": "12"
21
+ }
22
+
23
+ def read_and_process_file(file):
24
+ # Read the first three lines as a single text string
25
+ with open(file.name, 'r') as f:
26
+ first_three_lines = ''.join([next(f) for _ in range(3)])
27
+
28
+ # Check for "Неделя" or "Week" (case-insensitive)
29
+ if not any(word in first_three_lines.lower() for word in ["неделя", "week"]):
30
+ period_type = "Month"
31
+ else:
32
+ period_type = "Week"
33
+
34
+ # Read the file again to process it
35
+ with open(file.name, 'r') as f:
36
+ lines = f.readlines()
37
+
38
+ # Check if the second line is empty
39
+ if lines[1].strip() == '':
40
+ source = 'Google'
41
+ data = pd.read_csv(file.name, skiprows=2)
42
+ # Replace any occurrences of "<1" with 0
43
+ else:
44
+ source = 'Yandex'
45
+ data = pd.read_csv(file.name, sep=';', skiprows=0, usecols=[0, 2])
46
+ if period_type == "Month":
47
+ # Replace Russian months with yyyy-MM format
48
+ data.iloc[:, 0] = data.iloc[:, 0].apply(lambda x: re.sub(r'(\w+)\s(\d{4})', lambda m: f'{m.group(2)}-{russian_months[m.group(1).lower()]}', x) + '-01')
49
+ if period_type == "Week":
50
+ data.iloc[:, 0] = pd.to_datetime(data.iloc[:, 0], format="%d.%m.%Y")
51
+ # Replace any occurrences of "<1" with 0
52
+ data.iloc[:, 1] = data.iloc[:, 1].str.replace('<1', '0').str.replace(' ', '').str.replace(',', '.').astype(float)
53
+
54
+ # Process the date column and set it as the index
55
+ period_col = data.columns[0]
56
+ data[period_col] = pd.to_datetime(data[period_col])
57
+ data.set_index(period_col, inplace=True)
58
+
59
+ return data, period_type, period_col
60
+
61
+ def forecast_time_series(file):
62
+ data, period_type, period_col = read_and_process_file(file)
63
+
64
+ if period_type == "Month":
65
+ year = 12
66
+ n_periods = 24
67
+ freq = "MS"
68
+ else:
69
+ year = 52
70
+ n_periods = year * 2 # Number of periods to forecast
71
+ freq = "W"
72
+
73
+ # Prepare data for Prophet
74
+ df = data.reset_index().rename(columns={period_col: 'ds', data.columns[0]: 'y'})
75
+
76
+ # Fit the Prophet model
77
+ model = Prophet()
78
+ model.fit(df)
79
+
80
+ # Create future dataframe
81
+ future = model.make_future_dataframe(periods=n_periods, freq=freq)
82
+
83
+ # Forecasting
84
+ forecast = model.predict(future)
85
+
86
+ # Calculate the YoY change
87
+ sum_last_year_original = df['y'].iloc[-year:].sum()
88
+ sum_first_year_forecast = forecast['yhat'].iloc[-n_periods:-n_periods + year].sum()
89
+ yoy_change = (sum_first_year_forecast - sum_last_year_original) / sum_last_year_original
90
+
91
+ # Create an interactive plot with Plotly
92
+ fig = go.Figure()
93
+ fig.add_trace(go.Scatter(x=data.index, y=data.iloc[:, 0], mode='lines', name='Observed'))
94
+ fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast', line=dict(color='red')))
95
+ fig.add_trace(go.Scatter(
96
+ x=forecast['ds'],
97
+ y=forecast['yhat_lower'],
98
+ fill=None,
99
+ mode='lines',
100
+ line=dict(color='pink'),
101
+ # showlegend=False,
102
+ name='Lower CI'
103
+ ))
104
+ fig.add_trace(go.Scatter(
105
+ x=forecast['ds'],
106
+ y=forecast['yhat_upper'],
107
+ fill='tonexty',
108
+ mode='lines',
109
+ line=dict(color='pink'),
110
+ name='Upper CI'
111
+ ))
112
+ fig.update_layout(
113
+ title='Observed Time Series and Forecast with Confidence Intervals',
114
+ xaxis_title='Date',
115
+ yaxis_title='Values',
116
+ legend=dict(
117
+ orientation='h',
118
+ yanchor='bottom',
119
+ y=1.02,
120
+ xanchor='right',
121
+ x=1
122
+ ),
123
+ hovermode='x unified'
124
+ )
125
+
126
+ # Combine original data and forecast data into one DataFrame for export
127
+ combined_df = pd.concat([data, forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']]], axis=1)
128
+ combined_file = 'combined_data.csv'
129
+ combined_df.to_csv(combined_file)
130
+
131
+ # Return plot, YoY change, and file path for export
132
+ return fig, f'Year-over-Year Change in Sum of Values: {yoy_change:.2%}', combined_file
133
+
134
+ # Create Gradio interface using Blocks
135
+ with gr.Blocks(theme=gr.themes.Monochrome()) as interface:
136
+ gr.Markdown("# Time Series Forecasting")
137
+ gr.Markdown("Upload a CSV file with a time series to forecast the next 2 years and see the YoY % change. Download the combined original and forecast data.")
138
+
139
+ with gr.Row():
140
+ file_input = gr.File(label="Upload Time Series CSV")
141
+
142
+ with gr.Row():
143
+ plot_output = gr.Plot(label="Time Series + Forecast Chart")
144
+
145
+ with gr.Row():
146
+ yoy_output = gr.Text(label="YoY % Change")
147
+
148
+ with gr.Row():
149
+ csv_output = gr.File(label="Download Combined Data CSV")
150
+
151
+ file_input.change(forecast_time_series, inputs=file_input, outputs=[plot_output, yoy_output, csv_output])
152
+
153
+ # Launch the interface
154
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ plotly
4
+ prophet