alexander-lazarin
commited on
Commit
•
62d6afe
1
Parent(s):
dd9c62c
Initial commit
Browse files- app.py +154 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from prophet import Prophet
|
4 |
+
import plotly.graph_objs as go
|
5 |
+
import re
|
6 |
+
|
7 |
+
# Dictionary to map Russian month names to month numbers
|
8 |
+
russian_months = {
|
9 |
+
"январь": "01",
|
10 |
+
"февраль": "02",
|
11 |
+
"март": "03",
|
12 |
+
"апрель": "04",
|
13 |
+
"май": "05",
|
14 |
+
"июнь": "06",
|
15 |
+
"июль": "07",
|
16 |
+
"август": "08",
|
17 |
+
"сентябрь": "09",
|
18 |
+
"октябрь": "10",
|
19 |
+
"ноябрь": "11",
|
20 |
+
"декабрь": "12"
|
21 |
+
}
|
22 |
+
|
23 |
+
def read_and_process_file(file):
|
24 |
+
# Read the first three lines as a single text string
|
25 |
+
with open(file.name, 'r') as f:
|
26 |
+
first_three_lines = ''.join([next(f) for _ in range(3)])
|
27 |
+
|
28 |
+
# Check for "Неделя" or "Week" (case-insensitive)
|
29 |
+
if not any(word in first_three_lines.lower() for word in ["неделя", "week"]):
|
30 |
+
period_type = "Month"
|
31 |
+
else:
|
32 |
+
period_type = "Week"
|
33 |
+
|
34 |
+
# Read the file again to process it
|
35 |
+
with open(file.name, 'r') as f:
|
36 |
+
lines = f.readlines()
|
37 |
+
|
38 |
+
# Check if the second line is empty
|
39 |
+
if lines[1].strip() == '':
|
40 |
+
source = 'Google'
|
41 |
+
data = pd.read_csv(file.name, skiprows=2)
|
42 |
+
# Replace any occurrences of "<1" with 0
|
43 |
+
else:
|
44 |
+
source = 'Yandex'
|
45 |
+
data = pd.read_csv(file.name, sep=';', skiprows=0, usecols=[0, 2])
|
46 |
+
if period_type == "Month":
|
47 |
+
# Replace Russian months with yyyy-MM format
|
48 |
+
data.iloc[:, 0] = data.iloc[:, 0].apply(lambda x: re.sub(r'(\w+)\s(\d{4})', lambda m: f'{m.group(2)}-{russian_months[m.group(1).lower()]}', x) + '-01')
|
49 |
+
if period_type == "Week":
|
50 |
+
data.iloc[:, 0] = pd.to_datetime(data.iloc[:, 0], format="%d.%m.%Y")
|
51 |
+
# Replace any occurrences of "<1" with 0
|
52 |
+
data.iloc[:, 1] = data.iloc[:, 1].str.replace('<1', '0').str.replace(' ', '').str.replace(',', '.').astype(float)
|
53 |
+
|
54 |
+
# Process the date column and set it as the index
|
55 |
+
period_col = data.columns[0]
|
56 |
+
data[period_col] = pd.to_datetime(data[period_col])
|
57 |
+
data.set_index(period_col, inplace=True)
|
58 |
+
|
59 |
+
return data, period_type, period_col
|
60 |
+
|
61 |
+
def forecast_time_series(file):
|
62 |
+
data, period_type, period_col = read_and_process_file(file)
|
63 |
+
|
64 |
+
if period_type == "Month":
|
65 |
+
year = 12
|
66 |
+
n_periods = 24
|
67 |
+
freq = "MS"
|
68 |
+
else:
|
69 |
+
year = 52
|
70 |
+
n_periods = year * 2 # Number of periods to forecast
|
71 |
+
freq = "W"
|
72 |
+
|
73 |
+
# Prepare data for Prophet
|
74 |
+
df = data.reset_index().rename(columns={period_col: 'ds', data.columns[0]: 'y'})
|
75 |
+
|
76 |
+
# Fit the Prophet model
|
77 |
+
model = Prophet()
|
78 |
+
model.fit(df)
|
79 |
+
|
80 |
+
# Create future dataframe
|
81 |
+
future = model.make_future_dataframe(periods=n_periods, freq=freq)
|
82 |
+
|
83 |
+
# Forecasting
|
84 |
+
forecast = model.predict(future)
|
85 |
+
|
86 |
+
# Calculate the YoY change
|
87 |
+
sum_last_year_original = df['y'].iloc[-year:].sum()
|
88 |
+
sum_first_year_forecast = forecast['yhat'].iloc[-n_periods:-n_periods + year].sum()
|
89 |
+
yoy_change = (sum_first_year_forecast - sum_last_year_original) / sum_last_year_original
|
90 |
+
|
91 |
+
# Create an interactive plot with Plotly
|
92 |
+
fig = go.Figure()
|
93 |
+
fig.add_trace(go.Scatter(x=data.index, y=data.iloc[:, 0], mode='lines', name='Observed'))
|
94 |
+
fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast', line=dict(color='red')))
|
95 |
+
fig.add_trace(go.Scatter(
|
96 |
+
x=forecast['ds'],
|
97 |
+
y=forecast['yhat_lower'],
|
98 |
+
fill=None,
|
99 |
+
mode='lines',
|
100 |
+
line=dict(color='pink'),
|
101 |
+
# showlegend=False,
|
102 |
+
name='Lower CI'
|
103 |
+
))
|
104 |
+
fig.add_trace(go.Scatter(
|
105 |
+
x=forecast['ds'],
|
106 |
+
y=forecast['yhat_upper'],
|
107 |
+
fill='tonexty',
|
108 |
+
mode='lines',
|
109 |
+
line=dict(color='pink'),
|
110 |
+
name='Upper CI'
|
111 |
+
))
|
112 |
+
fig.update_layout(
|
113 |
+
title='Observed Time Series and Forecast with Confidence Intervals',
|
114 |
+
xaxis_title='Date',
|
115 |
+
yaxis_title='Values',
|
116 |
+
legend=dict(
|
117 |
+
orientation='h',
|
118 |
+
yanchor='bottom',
|
119 |
+
y=1.02,
|
120 |
+
xanchor='right',
|
121 |
+
x=1
|
122 |
+
),
|
123 |
+
hovermode='x unified'
|
124 |
+
)
|
125 |
+
|
126 |
+
# Combine original data and forecast data into one DataFrame for export
|
127 |
+
combined_df = pd.concat([data, forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']]], axis=1)
|
128 |
+
combined_file = 'combined_data.csv'
|
129 |
+
combined_df.to_csv(combined_file)
|
130 |
+
|
131 |
+
# Return plot, YoY change, and file path for export
|
132 |
+
return fig, f'Year-over-Year Change in Sum of Values: {yoy_change:.2%}', combined_file
|
133 |
+
|
134 |
+
# Create Gradio interface using Blocks
|
135 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as interface:
|
136 |
+
gr.Markdown("# Time Series Forecasting")
|
137 |
+
gr.Markdown("Upload a CSV file with a time series to forecast the next 2 years and see the YoY % change. Download the combined original and forecast data.")
|
138 |
+
|
139 |
+
with gr.Row():
|
140 |
+
file_input = gr.File(label="Upload Time Series CSV")
|
141 |
+
|
142 |
+
with gr.Row():
|
143 |
+
plot_output = gr.Plot(label="Time Series + Forecast Chart")
|
144 |
+
|
145 |
+
with gr.Row():
|
146 |
+
yoy_output = gr.Text(label="YoY % Change")
|
147 |
+
|
148 |
+
with gr.Row():
|
149 |
+
csv_output = gr.File(label="Download Combined Data CSV")
|
150 |
+
|
151 |
+
file_input.change(forecast_time_series, inputs=file_input, outputs=[plot_output, yoy_output, csv_output])
|
152 |
+
|
153 |
+
# Launch the interface
|
154 |
+
interface.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
pandas
|
3 |
+
plotly
|
4 |
+
prophet
|