Spaces:
Runtime error
Runtime error
feat: add timenet table
Browse files- app.py +15 -1
- src/utils.py +25 -1
app.py
CHANGED
@@ -123,6 +123,20 @@ def st_timenet_features():
|
|
123 |
plot_closest_series(df, closest_ids[0]['id'], CATALOGUE)
|
124 |
)
|
125 |
st.header('Potential winner models')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
st.write(
|
127 |
"""
|
128 |
This plot showcases the "win rate" of various predictive models.
|
@@ -132,7 +146,7 @@ def st_timenet_features():
|
|
132 |
"""
|
133 |
)
|
134 |
st.pyplot(
|
135 |
-
|
136 |
)
|
137 |
|
138 |
if __name__ == "__main__":
|
|
|
123 |
plot_closest_series(df, closest_ids[0]['id'], CATALOGUE)
|
124 |
)
|
125 |
st.header('Potential winner models')
|
126 |
+
|
127 |
+
fig, summary_df = plot_best_models_count(closest_ids, CATALOGUE)
|
128 |
+
st.subheader("Model performance analysis for similar time series")
|
129 |
+
st.write(
|
130 |
+
"""
|
131 |
+
This section presents a table that illustrates the average scaled performance of the closest series to your uploaded series. The performance metric used here is compared against a Naive forecast model. A Naive forecast model is a simple prediction method that assumes the future will be the same as the present. This comparison allows you to understand how well more sophisticated models perform relative to this basic prediction strategy.
|
132 |
+
|
133 |
+
In other words, the table shows the performance of various models when applied to time series that are highly similar to the one you uploaded, relative to a simple model that only projects the current values into the future. This allows you to assess what kind of improvements you might expect if you were to employ these more sophisticated models on your own series.
|
134 |
+
|
135 |
+
By using this information, you can make more informed decisions about which models are likely to provide valuable insights for your particular data set. It also offers the opportunity to assess and explore the potential benefits of using different forecasting models for your data.
|
136 |
+
"""
|
137 |
+
)
|
138 |
+
st.dataframe(summary_df)
|
139 |
+
st.subheader("Winner models")
|
140 |
st.write(
|
141 |
"""
|
142 |
This plot showcases the "win rate" of various predictive models.
|
|
|
146 |
"""
|
147 |
)
|
148 |
st.pyplot(
|
149 |
+
fig
|
150 |
)
|
151 |
|
152 |
if __name__ == "__main__":
|
src/utils.py
CHANGED
@@ -58,6 +58,24 @@ def get_closest_ids(x: list, top_k: int, index_pinecone):
|
|
58 |
)
|
59 |
return query_response['matches']
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def plot_best_models_count(ids, catalogue):
|
62 |
uids = [x['id'] for x in ids]
|
63 |
file_evaluations = catalogue['file_evaluation'].loc[uids].unique()
|
@@ -70,10 +88,16 @@ def plot_best_models_count(ids, catalogue):
|
|
70 |
values='value'
|
71 |
).reset_index()
|
72 |
models = eval_df.drop(columns=['unique_id', 'metric']).columns
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
eval_df['BestModel'] = eval_df[models].idxmin(axis=1)
|
74 |
#eval_df = eval_df.groupby(['BestModel', 'metric']).size().rename('n').reset_index()
|
75 |
fig = sns.catplot(eval_df.query('metric != "mase"'), y='BestModel', kind='count', col='metric')
|
76 |
-
return fig
|
77 |
|
78 |
def plot_closest_series(Y_df, id, catalogue):
|
79 |
# leer archivo de file_timenet y hacer el plot
|
|
|
58 |
)
|
59 |
return query_response['matches']
|
60 |
|
61 |
+
def highlight_smallest(s, nsmallest=3):
|
62 |
+
# Define colors
|
63 |
+
colors = ['lightgreen', 'lightblue', 'lightpink']
|
64 |
+
|
65 |
+
# Rank data and find the nsmallest
|
66 |
+
ranks = s.rank(method="min").astype(int)
|
67 |
+
smallest = ranks.isin(ranks.nsmallest(nsmallest))
|
68 |
+
|
69 |
+
# Initialize an empty string for the styles
|
70 |
+
attr = ['' for _ in s]
|
71 |
+
|
72 |
+
# Apply styles to the nsmallest
|
73 |
+
for i in range(1, nsmallest+1):
|
74 |
+
mask = ranks == i
|
75 |
+
attr = ['background-color: {};'.format(colors[i-1]) if v else a for v, a in zip(mask, attr)]
|
76 |
+
|
77 |
+
return attr
|
78 |
+
|
79 |
def plot_best_models_count(ids, catalogue):
|
80 |
uids = [x['id'] for x in ids]
|
81 |
file_evaluations = catalogue['file_evaluation'].loc[uids].unique()
|
|
|
88 |
values='value'
|
89 |
).reset_index()
|
90 |
models = eval_df.drop(columns=['unique_id', 'metric']).columns
|
91 |
+
# compute relative metric
|
92 |
+
for model in models:
|
93 |
+
eval_df[model] = eval_df[model] / eval_df['Naive']
|
94 |
+
summary_df = eval_df.groupby('metric')[models].median().T
|
95 |
+
summary_df = summary_df[summary_df.index != 'Naive'].sort_values('mae')
|
96 |
+
summary_df = summary_df.style.apply(highlight_smallest, nsmallest=3, axis=0)
|
97 |
eval_df['BestModel'] = eval_df[models].idxmin(axis=1)
|
98 |
#eval_df = eval_df.groupby(['BestModel', 'metric']).size().rename('n').reset_index()
|
99 |
fig = sns.catplot(eval_df.query('metric != "mase"'), y='BestModel', kind='count', col='metric')
|
100 |
+
return fig, summary_df
|
101 |
|
102 |
def plot_closest_series(Y_df, id, catalogue):
|
103 |
# leer archivo de file_timenet y hacer el plot
|