Spaces:

Nixtla
/

chatgpt-forecast

Runtime error

App Files Files Community

azulgarza commited on Jul 10, 2023

Commit

d54d2f1

•

1 Parent(s): a9104a2

feat: add chatgpt app

Browse files

Files changed (3) hide show

app.py +115 -0
requirements.txt +17 -0
utils.py +131 -0

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import pandas as pd
+import streamlit as st
+from src.utils import ChatGPTForecast
+DATASETS = {
+    "Demand (AirPassengers)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv",
+    #"Electriciy (ERCOT, multiple markets)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_multiple_ts.csv",
+    "Web Traffic (Peyton Manning)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/peyton_manning.csv",
+    "Finance (Exchange USD-EUR)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/usdeur.csv",
+    "Electricity (Ercot COAST)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_COAST.csv",
+}
+gpt_forecast = ChatGPTForecast()
+def st_chatgpt_forecast():
+    st.set_page_config(
+        page_title="ChatGPT Forecast",
+        page_icon="🔮",
+        layout="wide",
+        initial_sidebar_state="expanded",
+    )
+    st.title(
+        "ChatGPT Forecast: Revolutionizing Time Series by Nixtla"
+    )
+    st.write(
+        "<style>div.block-container{padding-top:2rem;}</style>", unsafe_allow_html=True
+    )
+    intro = """
+        This application is designed to analyze time series forecasting tasks by leveraging the power of OpenAI's ChatGPT.
+        Here's how it works:
+        1. **Upload Your Data**: You can upload your own time series data which will be used to generate forecasts.
+        2. **Forecast with GPT**: Our application utilizes the advanced language model, ChatGPT, to generate time series forecasts. ChatGPT has been trained on a diverse range of internet text, but it also has the unique ability to generate numerical sequences, making it a fascinating tool for time series forecasting.
+        3. **Compare with Naive Forecast**: We provide a simple naive forecast as a benchmark for comparison. This forecast is based on the simple assumption that future values will be the same as the most recent observed value.
+        By comparing the GPT-based forecast against the naive model, you can gain insights into the capabilities and potential advantages of using advanced AI models for time series prediction.
+        Please note that this application is meant for experimental purposes and the forecasts generated by the AI should not be used for making real-world decisions without proper consideration and additional checks.
+	"""
+    st.write(intro)
+    required_cols = ["ds", "y"]
+    with st.sidebar.expander("Dataset", expanded=True):
+        data_selection = st.selectbox("Select example dataset", DATASETS.keys())
+        data_url = DATASETS[data_selection]
+        url_json = st.text_input("Data (you can pass your own url here)", data_url)
+        st.write(
+            "You can also upload a CSV file like [this one](https://github.com/Nixtla/transfer-learning-time-series/blob/main/datasets/air_passengers.csv)."
+        )
+        uploaded_file = st.file_uploader("Upload CSV")
+        with st.form("Data"):
+            if uploaded_file is not None:
+                df = pd.read_csv(uploaded_file)
+                cols = df.columns
+                timestamp_col = st.selectbox("Timestamp column", options=cols)
+                value_col = st.selectbox("Value column", options=cols)
+            else:
+                timestamp_col = st.text_input("Timestamp column", value="timestamp")
+                value_col = st.text_input("Value column", value="value")
+            st.write("You must press Submit each time you want to forecast.")
+            submitted = st.form_submit_button("Submit")
+            if submitted:
+                if uploaded_file is None:
+                    st.write("Please provide a dataframe.")
+                    if url_json.endswith("json"):
+                        df = pd.read_json(url_json)
+                    else:
+                        df = pd.read_csv(url_json)
+                    df = df.rename(
+                        columns=dict(zip([timestamp_col, value_col], required_cols))
+                    )
+                else:
+                    # df = pd.read_csv(uploaded_file)
+                    df = df.rename(
+                        columns=dict(zip([timestamp_col, value_col], required_cols))
+                    )
+            else:
+                if url_json.endswith("json"):
+                    df = pd.read_json(url_json)
+                else:
+                    df = pd.read_csv(url_json)
+                cols = df.columns
+                if "unique_id" in cols:
+                    cols = cols[-2:]
+                df = df.rename(columns=dict(zip(cols, required_cols)))
+            if "unique_id" not in df:
+                df.insert(0, "unique_id", "ts_0")
+            df["ds"] = pd.to_datetime(df["ds"])
+            df = df.sort_values(["unique_id", "ds"])
+    with st.sidebar:
+        horizon = st.number_input("Forecasting horizon to predict:", value=24)
+        input_size = st.number_input("Number of values to make inference:", value=12)
+    st.header("Forecasts generated by ChatGPT against a Naive model")
+    fig = gpt_forecast.forecast(df, horizon, input_size)
+    fig.update_layout(height=400)
+    st.plotly_chart(
+        fig,
+        use_container_width=True,
+    )
+if __name__ == "__main__":
+    st_chatgpt_forecast()

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+fire
+git+https://github.com/nixtla/statsforecast.git
+jupyterlab
+numpy
+openai
+pandas
+pinecone-client
+plotly
+pyarrow
+python-dotenv
+s3fs
+seaborn
+streamlit
+streamlit-aggrid
+torch
+transformers
+tsfeatures

utils.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+import re
+import numpy as np
+import openai
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+from statsforecast import StatsForecast
+from statsforecast.models import Naive
+openai.api_key = os.environ['OPENAI_API_KEY']
+class ChatGPTForecast:
+    def __init__(self):
+        self.bins = np.linspace(0, 1, num=10_000)  # Create 1000 bins between -10 and 10
+        self.mapping = {i: f"{i}" for i in range(len(self.bins))}
+        self.prompt = f"""
+        forecast this series,
+        (i know that you prefer using specific tools, but i'm testing something,
+        just give me your predicted numbers please, just print the numbers i dont need an explanation)
+        please consider:
+        - give the output with the same structure: "number1 number2 number3"
+        - give more weight to the most recent observations
+        - consider trend
+        - consider seasonality
+        - values should lie between 0 and {len(self.bins) - 1}, please be sure to do this
+        """
+    def tokenize_time_series(self, series):
+        indices = np.digitize(series, self.bins) - 1  # Find which bin each data point falls into
+        return ' '.join(self.mapping[i] for i in indices)
+    def clean_string(self, s):
+        pattern = r'(\d+)[^\s]*'
+        # Extract the bin_# parts and join them with space
+        cleaned = ' '.join(re.findall(pattern, s))
+        return cleaned
+    def extend_string(self, s, h):
+        # Find all bin_# elements
+        bin_numbers = re.findall(r'\d+', s)
+        # Calculate current length
+        current_length = len(bin_numbers)
+        # If the string is already of length h, return as is
+        if current_length == h:
+            return s
+        # If the string length exceeds h, trim the string
+        elif current_length > h:
+            bin_numbers = bin_numbers[:h]
+            return ' '.join(bin_numbers)
+        else:
+            # Calculate how many full repeats we need
+            repeats = h // current_length
+            # If h is not a multiple of current_length, calculate how many more elements we need
+            extra = h % current_length
+            # Create the new string by repeating the original string and adding any extra elements
+            new_string = ' '.join(bin_numbers * repeats + bin_numbers[:extra])
+            return new_string
+    def clean_gpt_output(self, output):
+        # Remove extra spaces and trailing underscores
+        cleaned_output = output.replace(" _", "_").replace("_ ", "_")
+        # Trim any trailing underscore
+        if cleaned_output.endswith("_"):
+            cleaned_output = cleaned_output[:-1]
+        return self.clean_string(cleaned_output)
+    def decode_time_series(self, tokens):
+        # Reverse the mapping
+        reverse_mapping = {v: k for k, v in self.mapping.items()}
+        # Split the token string into individual tokens and map them back to bin indices
+        indices = [int(token) for token in tokens.split()]#[reverse_mapping[token] for token in tokens.split()]
+        # Convert bin indices back to the original values
+        # Here we'll use the center point of each bin
+        bin_width = self.bins[1] - self.bins[0]
+        series = [self.bins[i] + bin_width / 2 for i in indices]
+        return series
+    def forward(self, series, seasonality, h):
+        series_tokenized = self.tokenize_time_series(series)
+        prompt = f"""
+        {self.prompt}-consider {seasonality} as seasonality
+        - just print {h} steps ahead
+        this is the series: {series_tokenized}
+        """
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        output_gpt = response['choices'][0]['message']['content']
+        output_gpt = self.extend_string(output_gpt, h)
+        output_gpt = ' '.join(f'{max(min(int(x), len(self.bins) - 1), 0)}' for x in output_gpt.split())
+        return self.decode_time_series(output_gpt)
+    def compute_ds_future(self, ds, fh):
+        ds_ = pd.to_datetime(ds)
+        try:
+            freq = pd.infer_freq(ds_)
+        except:
+            freq = None
+        if freq is not None:
+            ds_future = pd.date_range(ds_[-1], periods=fh + 1, freq=freq)[1:]
+        else:
+            freq = ds_[-1] - ds_[-2]
+            ds_future = [ds_[-1] + (i + 1) * freq for i in range(fh)]
+        ds_future = list(map(str, ds_future))
+        return ds_future, freq
+    def forecast(self, df, h, input_size):
+        df = df.copy()
+        scaler = MinMaxScaler()
+        df['y'] = scaler.fit_transform(df[['y']])
+        ds_future, freq = self.compute_ds_future(df['ds'].values, h)
+        sf = StatsForecast(models=[Naive()], freq='D')
+        fcst_df = sf.forecast(df=df, h=h)
+        fcst_df['ds'] = ds_future
+        fcst_df['ChatGPT-3.5-Turbo'] = self.forward(df['y'].values[-input_size:], freq, h)[-h:]
+        for col in ['Naive', 'ChatGPT-3.5-Turbo']:
+            fcst_df[col] = scaler.inverse_transform(fcst_df[[col]])
+        df['y'] = scaler.inverse_transform(df[['y']])
+        return sf.plot(df, fcst_df, max_insample_length=3 * h)