azulgarza commited on
Commit
56baf6d
1 Parent(s): 1233062

feat: add timenet table

Browse files
Files changed (2) hide show
  1. app.py +15 -1
  2. src/utils.py +25 -1
app.py CHANGED
@@ -123,6 +123,20 @@ def st_timenet_features():
123
  plot_closest_series(df, closest_ids[0]['id'], CATALOGUE)
124
  )
125
  st.header('Potential winner models')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  st.write(
127
  """
128
  This plot showcases the "win rate" of various predictive models.
@@ -132,7 +146,7 @@ def st_timenet_features():
132
  """
133
  )
134
  st.pyplot(
135
- plot_best_models_count(closest_ids, CATALOGUE)
136
  )
137
 
138
  if __name__ == "__main__":
 
123
  plot_closest_series(df, closest_ids[0]['id'], CATALOGUE)
124
  )
125
  st.header('Potential winner models')
126
+
127
+ fig, summary_df = plot_best_models_count(closest_ids, CATALOGUE)
128
+ st.subheader("Model performance analysis for similar time series")
129
+ st.write(
130
+ """
131
+ This section presents a table that illustrates the average scaled performance of the closest series to your uploaded series. The performance metric used here is compared against a Naive forecast model. A Naive forecast model is a simple prediction method that assumes the future will be the same as the present. This comparison allows you to understand how well more sophisticated models perform relative to this basic prediction strategy.
132
+
133
+ In other words, the table shows the performance of various models when applied to time series that are highly similar to the one you uploaded, relative to a simple model that only projects the current values into the future. This allows you to assess what kind of improvements you might expect if you were to employ these more sophisticated models on your own series.
134
+
135
+ By using this information, you can make more informed decisions about which models are likely to provide valuable insights for your particular data set. It also offers the opportunity to assess and explore the potential benefits of using different forecasting models for your data.
136
+ """
137
+ )
138
+ st.dataframe(summary_df)
139
+ st.subheader("Winner models")
140
  st.write(
141
  """
142
  This plot showcases the "win rate" of various predictive models.
 
146
  """
147
  )
148
  st.pyplot(
149
+ fig
150
  )
151
 
152
  if __name__ == "__main__":
src/utils.py CHANGED
@@ -58,6 +58,24 @@ def get_closest_ids(x: list, top_k: int, index_pinecone):
58
  )
59
  return query_response['matches']
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def plot_best_models_count(ids, catalogue):
62
  uids = [x['id'] for x in ids]
63
  file_evaluations = catalogue['file_evaluation'].loc[uids].unique()
@@ -70,10 +88,16 @@ def plot_best_models_count(ids, catalogue):
70
  values='value'
71
  ).reset_index()
72
  models = eval_df.drop(columns=['unique_id', 'metric']).columns
 
 
 
 
 
 
73
  eval_df['BestModel'] = eval_df[models].idxmin(axis=1)
74
  #eval_df = eval_df.groupby(['BestModel', 'metric']).size().rename('n').reset_index()
75
  fig = sns.catplot(eval_df.query('metric != "mase"'), y='BestModel', kind='count', col='metric')
76
- return fig
77
 
78
  def plot_closest_series(Y_df, id, catalogue):
79
  # leer archivo de file_timenet y hacer el plot
 
58
  )
59
  return query_response['matches']
60
 
61
+ def highlight_smallest(s, nsmallest=3):
62
+ # Define colors
63
+ colors = ['lightgreen', 'lightblue', 'lightpink']
64
+
65
+ # Rank data and find the nsmallest
66
+ ranks = s.rank(method="min").astype(int)
67
+ smallest = ranks.isin(ranks.nsmallest(nsmallest))
68
+
69
+ # Initialize an empty string for the styles
70
+ attr = ['' for _ in s]
71
+
72
+ # Apply styles to the nsmallest
73
+ for i in range(1, nsmallest+1):
74
+ mask = ranks == i
75
+ attr = ['background-color: {};'.format(colors[i-1]) if v else a for v, a in zip(mask, attr)]
76
+
77
+ return attr
78
+
79
  def plot_best_models_count(ids, catalogue):
80
  uids = [x['id'] for x in ids]
81
  file_evaluations = catalogue['file_evaluation'].loc[uids].unique()
 
88
  values='value'
89
  ).reset_index()
90
  models = eval_df.drop(columns=['unique_id', 'metric']).columns
91
+ # compute relative metric
92
+ for model in models:
93
+ eval_df[model] = eval_df[model] / eval_df['Naive']
94
+ summary_df = eval_df.groupby('metric')[models].median().T
95
+ summary_df = summary_df[summary_df.index != 'Naive'].sort_values('mae')
96
+ summary_df = summary_df.style.apply(highlight_smallest, nsmallest=3, axis=0)
97
  eval_df['BestModel'] = eval_df[models].idxmin(axis=1)
98
  #eval_df = eval_df.groupby(['BestModel', 'metric']).size().rename('n').reset_index()
99
  fig = sns.catplot(eval_df.query('metric != "mase"'), y='BestModel', kind='count', col='metric')
100
+ return fig, summary_df
101
 
102
  def plot_closest_series(Y_df, id, catalogue):
103
  # leer archivo de file_timenet y hacer el plot