thesven commited on
Commit
05b4326
1 Parent(s): ce9e854
Files changed (3) hide show
  1. .gitignore +5 -0
  2. .idea/.gitignore +8 -0
  3. app.py +312 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ BTC-Autoformer.ipynb
2
+ BTC_Dataset_to_huggingface.ipynb
3
+ huggingface_model.ipynb
4
+ app_backtest.py
5
+ .idea/*
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ from typing import Optional, Iterable
3
+
4
+ # Third-party library imports
5
+ from transformers import PretrainedConfig, AutoformerForPrediction
6
+ from functools import lru_cache, partial
7
+
8
+ import gradio as gr
9
+ import spaces
10
+ import torch
11
+ import pandas as pd
12
+
13
+ # External imports
14
+
15
+ # GluonTS imports
16
+ from gluonts.dataset.field_names import FieldName
17
+ from gluonts.transform import (
18
+ AddAgeFeature,
19
+ AddObservedValuesIndicator,
20
+ AddTimeFeatures,
21
+ AsNumpyArray,
22
+ Chain,
23
+ ExpectedNumInstanceSampler,
24
+ InstanceSplitter,
25
+ RemoveFields,
26
+ TestSplitSampler,
27
+ Transformation,
28
+ ValidationSplitSampler,
29
+ VstackFeatures,
30
+ RenameFields,
31
+ )
32
+ from gluonts.time_feature import time_features_from_frequency_str
33
+ from gluonts.transform.sampler import InstanceSampler
34
+
35
+ # Hugging Face Datasets imports
36
+ from datasets import Dataset, Features, Value, Sequence, load_dataset
37
+
38
+ # GluonTS Loader imports
39
+ from gluonts.dataset.loader import as_stacked_batches
40
+
41
+ import matplotlib.pyplot as plt
42
+ import matplotlib.dates as mdates
43
+ import numpy as np
44
+
45
+ def convert_to_pandas_period(date, freq):
46
+ return pd.Period(date, freq)
47
+
48
+ def transform_start_field(batch, freq):
49
+ batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]]
50
+ return batch
51
+
52
+ def create_transformation(freq: str, config: PretrainedConfig, prediction_length: int) -> Transformation:
53
+ remove_field_names = []
54
+ if config.num_static_real_features == 0:
55
+ remove_field_names.append(FieldName.FEAT_STATIC_REAL)
56
+ if config.num_dynamic_real_features == 0:
57
+ remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL)
58
+ if config.num_static_categorical_features == 0:
59
+ remove_field_names.append(FieldName.FEAT_STATIC_CAT)
60
+
61
+ # a bit like torchvision.transforms.Compose
62
+ return Chain(
63
+ # step 1: remove static/dynamic fields if not specified
64
+ [RemoveFields(field_names=remove_field_names)]
65
+ # step 2: convert the data to NumPy (potentially not needed)
66
+ + (
67
+ [
68
+ AsNumpyArray(
69
+ field=FieldName.FEAT_STATIC_CAT,
70
+ expected_ndim=1,
71
+ dtype=int,
72
+ )
73
+ ]
74
+ if config.num_static_categorical_features > 0
75
+ else []
76
+ )
77
+ + (
78
+ [
79
+ AsNumpyArray(
80
+ field=FieldName.FEAT_STATIC_REAL,
81
+ expected_ndim=1,
82
+ )
83
+ ]
84
+ if config.num_static_real_features > 0
85
+ else []
86
+ )
87
+ + [
88
+ AsNumpyArray(
89
+ field=FieldName.TARGET,
90
+ # we expect an extra dim for the multivariate case:
91
+ expected_ndim=1 if config.input_size == 1 else 2,
92
+ ),
93
+ # step 3: handle the NaN's by filling in the target with zero
94
+ # and return the mask (which is in the observed values)
95
+ # true for observed values, false for nan's
96
+ # the decoder uses this mask (no loss is incurred for unobserved values)
97
+ # see loss_weights inside the xxxForPrediction model
98
+ AddObservedValuesIndicator(
99
+ target_field=FieldName.TARGET,
100
+ output_field=FieldName.OBSERVED_VALUES,
101
+ ),
102
+ # step 4: add temporal features based on freq of the dataset
103
+ # and the desired prediction length
104
+ AddTimeFeatures(
105
+ start_field=FieldName.START,
106
+ target_field=FieldName.TARGET,
107
+ output_field=FieldName.FEAT_TIME,
108
+ time_features=time_features_from_frequency_str(freq),
109
+ pred_length=prediction_length,
110
+ ),
111
+ # step 5: add another temporal feature (just a single number)
112
+ # tells the model where in its life the value of the time series is,
113
+ # sort of a running counter
114
+ AddAgeFeature(
115
+ target_field=FieldName.TARGET,
116
+ output_field=FieldName.FEAT_AGE,
117
+ pred_length=prediction_length,
118
+ log_scale=True,
119
+ ),
120
+ # step 6: vertically stack all the temporal features into the key FEAT_TIME
121
+ VstackFeatures(
122
+ output_field=FieldName.FEAT_TIME,
123
+ input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE]
124
+ + (
125
+ [FieldName.FEAT_DYNAMIC_REAL]
126
+ if config.num_dynamic_real_features > 0
127
+ else []
128
+ ),
129
+ ),
130
+ # step 7: rename to match HuggingFace names
131
+ RenameFields(
132
+ mapping={
133
+ FieldName.FEAT_STATIC_CAT: "static_categorical_features",
134
+ FieldName.FEAT_STATIC_REAL: "static_real_features",
135
+ FieldName.FEAT_TIME: "time_features",
136
+ FieldName.TARGET: "values",
137
+ FieldName.OBSERVED_VALUES: "observed_mask",
138
+ }
139
+ ),
140
+ ]
141
+ )
142
+
143
+ def create_instance_splitter(
144
+ config: PretrainedConfig,
145
+ mode: str,
146
+ prediction_length: int,
147
+ train_sampler: Optional[InstanceSampler] = None,
148
+ validation_sampler: Optional[InstanceSampler] = None,
149
+ ) -> Transformation:
150
+ assert mode in ["train", "validation", "test"]
151
+
152
+ instance_sampler = {
153
+ "train": train_sampler
154
+ or ExpectedNumInstanceSampler(
155
+ num_instances=1.0, min_future=prediction_length
156
+ ),
157
+ "validation": validation_sampler
158
+ or ValidationSplitSampler(min_future=prediction_length),
159
+ "test": TestSplitSampler(),
160
+ }[mode]
161
+
162
+ return InstanceSplitter(
163
+ target_field="values",
164
+ is_pad_field=FieldName.IS_PAD,
165
+ start_field=FieldName.START,
166
+ forecast_start_field=FieldName.FORECAST_START,
167
+ instance_sampler=instance_sampler,
168
+ past_length=config.context_length + max(config.lags_sequence),
169
+ future_length=prediction_length,
170
+ time_series_fields=["time_features", "observed_mask"],
171
+ )
172
+
173
+ def create_test_dataloader(
174
+ config: PretrainedConfig,
175
+ freq: str,
176
+ data: Dataset,
177
+ batch_size: int,
178
+ prediction_length: int,
179
+ **kwargs,
180
+ ):
181
+ PREDICTION_INPUT_NAMES = [
182
+ "past_time_features",
183
+ "past_values",
184
+ "past_observed_mask",
185
+ "future_time_features",
186
+ ]
187
+ if config.num_static_categorical_features > 0:
188
+ PREDICTION_INPUT_NAMES.append("static_categorical_features")
189
+
190
+ if config.num_static_real_features > 0:
191
+ PREDICTION_INPUT_NAMES.append("static_real_features")
192
+
193
+ transformation = create_transformation(freq, config, prediction_length)
194
+ transformed_data = transformation.apply(data, is_train=False)
195
+
196
+ # we create a Test Instance splitter which will sample the very last
197
+ # context window seen during training only for the encoder.
198
+ instance_sampler = create_instance_splitter(
199
+ config, "test", prediction_length=prediction_length
200
+ )
201
+
202
+ # we apply the transformations in test mode
203
+ testing_instances = instance_sampler.apply(transformed_data, is_train=False)
204
+
205
+ return as_stacked_batches(
206
+ testing_instances,
207
+ batch_size=batch_size,
208
+ output_type=torch.tensor,
209
+ field_names=PREDICTION_INPUT_NAMES,
210
+ )
211
+
212
+ def plot(ts_index, test_dataset, forecasts, prediction_length):
213
+ fig, ax = plt.subplots(figsize=(12, 8), facecolor='white')
214
+
215
+ # Length of the target data
216
+ target_length = len(test_dataset[ts_index]['target'])
217
+
218
+ # Creating a period range for the entire dataset plus forecast period
219
+ index = pd.period_range(
220
+ start=test_dataset[ts_index]['start'],
221
+ periods=target_length + prediction_length,
222
+ freq='1D'
223
+ ).to_timestamp()
224
+
225
+ # Plotting actual data
226
+ ax.plot(
227
+ index[:target_length],
228
+ test_dataset[ts_index]['target'],
229
+ label="Actual"
230
+ )
231
+
232
+ # Plotting the forecast data
233
+ # Forecast starts right after the last actual data point
234
+ forecast_start_index = target_length
235
+ ax.plot(
236
+ index[forecast_start_index:],
237
+ forecasts[ts_index][0][:prediction_length], # Use forecasts[ts_index][0][:prediction_length] to slice the forecast values
238
+ label="Prediction"
239
+ )
240
+
241
+ ax.set_ylim(0, 140000)
242
+ ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
243
+ ax.xaxis.set_minor_locator(mdates.MonthLocator())
244
+
245
+ plt.legend()
246
+ return fig
247
+
248
+
249
+
250
+ zero = torch.Tensor([0]).cuda()
251
+ print(zero.device) # <-- 'cpu' 🤔
252
+
253
+ @spaces.GPU
254
+ def do_prediction(days_to_predict: int):
255
+ device = zero.device
256
+
257
+ # Define the desired prediction length
258
+ prediction_length = 7 # Number of time steps to predict into the future
259
+ freq = "1D" # Daily frequency
260
+
261
+ dataset = load_dataset("thesven/BTC-Daily-Avg-Market-Value")
262
+
263
+ dataset['test'].set_transform(partial(transform_start_field, freq=freq))
264
+
265
+ model = AutoformerForPrediction.from_pretrained("thesven/BTC-Autoformer-v1")
266
+ config = model.config
267
+ print(f"Config: {config}")
268
+
269
+ test_dataloader = create_test_dataloader(
270
+ config=config,
271
+ freq=freq,
272
+ data=dataset['test'],
273
+ batch_size=64,
274
+ prediction_length=prediction_length,
275
+ )
276
+
277
+ model.to(device)
278
+ model.eval()
279
+
280
+ forecasts = []
281
+
282
+ for batch in test_dataloader:
283
+ outputs = model.generate(
284
+ static_categorical_features=batch["static_categorical_features"].to(device)
285
+ if config.num_static_categorical_features > 0
286
+ else None,
287
+ static_real_features=batch["static_real_features"].to(device)
288
+ if config.num_static_real_features > 0
289
+ else None,
290
+ past_time_features=batch["past_time_features"].to(device),
291
+ past_values=batch["past_values"].to(device),
292
+ future_time_features=batch["future_time_features"].to(device),
293
+ past_observed_mask=batch["past_observed_mask"].to(device),
294
+ )
295
+ forecasts.append(outputs.sequences.cpu().numpy())
296
+
297
+ forecasts = np.vstack(forecasts)
298
+
299
+ print(forecasts.shape)
300
+
301
+ return plot(0, dataset['test'], forecasts, prediction_length)
302
+
303
+
304
+ interface = gr.Interface(
305
+ fn=do_prediction,
306
+ inputs=gr.Slider(minimum=1, maximum=30, step=1, label="Days to Predict"),
307
+ outputs="plot",
308
+ title="Prediction Plot",
309
+ description="Adjust the slider to set the number of days to predict.",
310
+ allow_flagging=False, # Disable flagging for simplicity
311
+ )
312
+ interface.launch()