azulgarza commited on
Commit
b22704e
1 Parent(s): e676cab

feat: add conformal and ensembles

Browse files
Files changed (1) hide show
  1. src/utils.py +51 -15
src/utils.py CHANGED
@@ -10,7 +10,6 @@ from statsforecast.models import Naive
10
 
11
  openai.api_key = os.environ['OPENAI_API_KEY']
12
 
13
-
14
  class ChatGPTForecast:
15
 
16
  def __init__(self):
@@ -26,7 +25,6 @@ class ChatGPTForecast:
26
  - give more weight to the most recent observations
27
  - consider trend
28
  - consider seasonality
29
- - values should lie between 0 and {len(self.bins) - 1}, please be sure to do this
30
  """
31
 
32
  def tokenize_time_series(self, series):
@@ -79,23 +77,59 @@ class ChatGPTForecast:
79
  series = [self.bins[i] + bin_width / 2 for i in indices]
80
  return series
81
 
82
- def forward(self, series, seasonality, h):
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  series_tokenized = self.tokenize_time_series(series)
 
84
  prompt = f"""
85
  {self.prompt}-consider {seasonality} as seasonality
86
  - just print {h} steps ahead
 
87
 
88
 
89
  this is the series: {series_tokenized}
90
  """
91
  response = openai.ChatCompletion.create(
92
  model="gpt-3.5-turbo",
93
- messages=[{"role": "user", "content": prompt}]
 
94
  )
95
- output_gpt = response['choices'][0]['message']['content']
96
- output_gpt = self.extend_string(output_gpt, h)
97
- output_gpt = ' '.join(f'{max(min(int(x), len(self.bins) - 1), 0)}' for x in output_gpt.split())
98
- return self.decode_time_series(output_gpt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  def compute_ds_future(self, ds, fh):
101
  ds_ = pd.to_datetime(ds)
@@ -111,7 +145,7 @@ class ChatGPTForecast:
111
  ds_future = list(map(str, ds_future))
112
  return ds_future, freq
113
 
114
- def forecast(self, df, h, input_size):
115
  df = df.copy()
116
  scaler = MinMaxScaler()
117
  df['y'] = scaler.fit_transform(df[['y']])
@@ -120,12 +154,14 @@ class ChatGPTForecast:
120
  sf = StatsForecast(models=[Naive()], freq='D')
121
  fcst_df = sf.forecast(df=df, h=h)
122
  fcst_df['ds'] = ds_future
123
- fcst_df['ChatGPT-3.5-Turbo'] = self.forward(df['y'].values[-input_size:], freq, h)[-h:]
 
 
 
 
 
124
 
125
- for col in ['Naive', 'ChatGPT-3.5-Turbo']:
126
  fcst_df[col] = scaler.inverse_transform(fcst_df[[col]])
127
  df['y'] = scaler.inverse_transform(df[['y']])
128
- return sf.plot(df, fcst_df, max_insample_length=3 * h)
129
-
130
-
131
-
 
10
 
11
  openai.api_key = os.environ['OPENAI_API_KEY']
12
 
 
13
  class ChatGPTForecast:
14
 
15
  def __init__(self):
 
25
  - give more weight to the most recent observations
26
  - consider trend
27
  - consider seasonality
 
28
  """
29
 
30
  def tokenize_time_series(self, series):
 
77
  series = [self.bins[i] + bin_width / 2 for i in indices]
78
  return series
79
 
80
+ def find_min_max(self, string_of_integers):
81
+ # Split the string into a list of strings
82
+ str_list = string_of_integers.split()
83
+
84
+ # Convert the list of strings into a list of integers
85
+ int_list = [int(i) for i in str_list]
86
+
87
+ # Find the minimum and maximum values
88
+ min_value = min(int_list)
89
+ max_value = max(int_list)
90
+
91
+ return min_value, max_value
92
+
93
+ def call_openai(self, series, seasonality, h, n_forecasts):
94
  series_tokenized = self.tokenize_time_series(series)
95
+ min_val, max_val = self.find_min_max(series_tokenized)
96
  prompt = f"""
97
  {self.prompt}-consider {seasonality} as seasonality
98
  - just print {h} steps ahead
99
+ - values should be integers between {min_val} and {max_val}, please be sure to do this
100
 
101
 
102
  this is the series: {series_tokenized}
103
  """
104
  response = openai.ChatCompletion.create(
105
  model="gpt-3.5-turbo",
106
+ messages=[{"role": "user", "content": prompt}],
107
+ n=n_forecasts
108
  )
109
+ choices = response['choices']
110
+ outputs = []
111
+ for choice in choices:
112
+ output_gpt = choice['message']['content']
113
+ if len(output_gpt.split()) < 2:
114
+ continue
115
+ output_gpt = self.extend_string(output_gpt, h)
116
+ output_gpt = ' '.join(f'{max(min(int(x), len(self.bins) - 1), 0)}' for x in output_gpt.split())
117
+ outputs.append(self.decode_time_series(output_gpt))
118
+ outputs = np.vstack(outputs)
119
+ return outputs
120
+
121
+ def forward(self, series, seasonality, h, n_forecasts):
122
+ outputs = self.call_openai(series, seasonality, h, n_forecasts)
123
+ outputs = np.median(outputs, axis=0)
124
+ return outputs
125
+
126
+ def conformal_intervals(self, series, seasonality, h, n_forecasts):
127
+ series_train, series_test = series[:-h], series[-h:]
128
+ outputs = self.call_openai(series_train, seasonality, h, n_forecasts)
129
+ errors = np.abs(outputs - series_test)
130
+ lower_levels = np.quantile(errors, q=0.05, axis=0)
131
+ upper_levels = np.quantile(errors, q=0.095, axis=0)
132
+ return lower_levels, upper_levels
133
 
134
  def compute_ds_future(self, ds, fh):
135
  ds_ = pd.to_datetime(ds)
 
145
  ds_future = list(map(str, ds_future))
146
  return ds_future, freq
147
 
148
+ def forecast(self, df, h, input_size, n_forecasts=10):
149
  df = df.copy()
150
  scaler = MinMaxScaler()
151
  df['y'] = scaler.fit_transform(df[['y']])
 
154
  sf = StatsForecast(models=[Naive()], freq='D')
155
  fcst_df = sf.forecast(df=df, h=h)
156
  fcst_df['ds'] = ds_future
157
+ fcst_df['ChatGPT_3.5_Turbo'] = self.forward(df['y'].values[-input_size:], freq, h, n_forecasts)[-h:]
158
+
159
+ # add prediction intervals
160
+ lower_levels, upper_levels = self.conformal_intervals(df['y'].values[-(input_size + h):], freq, h, n_forecasts)
161
+ fcst_df['ChatGPT_3.5_Turbo-lo-90'] = fcst_df['ChatGPT_3.5_Turbo'] - lower_levels
162
+ fcst_df['ChatGPT_3.5_Turbo-hi-90'] = fcst_df['ChatGPT_3.5_Turbo'] + upper_levels
163
 
164
+ for col in ['Naive', 'ChatGPT_3.5_Turbo', 'ChatGPT_3.5_Turbo-lo-90', 'ChatGPT_3.5_Turbo-hi-90']:
165
  fcst_df[col] = scaler.inverse_transform(fcst_df[[col]])
166
  df['y'] = scaler.inverse_transform(df[['y']])
167
+ return sf.plot(df, fcst_df, max_insample_length=3 * h, level=[90])