Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import skops.io as sio | |
from io import BytesIO | |
class StockPredictor: | |
""" | |
A class used to load stock prediction models, process historical stock data, | |
and forecast stock prices. | |
Attributes | |
---------- | |
model_dir : str | |
Directory containing the trained models. | |
data_dir : str | |
Directory containing the historical stock data CSV files. | |
models : dict | |
Dictionary of loaded models. | |
Methods | |
------- | |
load_models(model_dir): | |
Loads the models from the specified directory. | |
load_stock_data(ticker): | |
Loads and processes historical stock data from a CSV file. | |
forecast(ticker, days): | |
Forecasts stock prices for the specified ticker and number of days. | |
""" | |
def __init__(self, model_dir="model/SKLearn_Models", data_dir="data"): | |
""" | |
Initializes the StockPredictor class by loading the models and setting the data directory. | |
Parameters | |
---------- | |
model_dir : str | |
Directory containing the trained models. | |
data_dir : str | |
Directory containing the historical stock data CSV files. | |
""" | |
self.models = self.load_models(model_dir) | |
self.data_dir = data_dir | |
def load_models(self, model_dir): | |
""" | |
Loads the models from the specified directory. | |
Parameters | |
---------- | |
model_dir : str | |
Directory containing the trained models. | |
Returns | |
------- | |
dict | |
Dictionary of loaded models. | |
""" | |
models = {} | |
for file in os.listdir(model_dir): | |
if file.endswith(".skops"): | |
ticker = file.split("_")[0] | |
models[ticker] = sio.load(os.path.join(model_dir, file)) | |
return models | |
def load_stock_data(self, ticker): | |
""" | |
Loads and processes historical stock data from a CSV file. | |
Parameters | |
---------- | |
ticker : str | |
Stock ticker symbol. | |
Returns | |
------- | |
pandas.DataFrame | |
Processed historical stock data. | |
""" | |
# Construct the CSV file path | |
csv_path = os.path.join(self.data_dir, f"{ticker}.csv") | |
data = pd.read_csv(csv_path) | |
# Convert 'date' to datetime | |
data["date"] = pd.to_datetime(data["date"]) | |
# Filter the data to start from the year 2000 | |
data = data[data["date"] >= "2000-01-01"] | |
# Sort by date | |
data.sort_values("date", inplace=True) | |
# Feature engineering: create new features such as year, month, day, and moving averages | |
data["year"] = data["date"].dt.year | |
data["month"] = data["date"].dt.month | |
data["day"] = data["date"].dt.day | |
data["ma_5"] = data["close"].rolling(window=5).mean() | |
data["ma_10"] = data["close"].rolling(window=10).mean() | |
# Adding lag features | |
data["lag_5"] = data["close"].shift(5) | |
data["lag_10"] = data["close"].shift(10) | |
# Drop rows with NaN values created by rolling window | |
data.dropna(inplace=True) | |
return data | |
def forecast(self, ticker, days): | |
""" | |
Forecasts stock prices for the specified ticker and number of days. | |
Parameters | |
---------- | |
ticker : str | |
Stock ticker symbol. | |
days : int | |
Number of days for forecasting. | |
Returns | |
------- | |
tuple | |
A tuple containing a DataFrame with dates, actual close values, and predicted close values, | |
and the plot as a numpy array. | |
""" | |
model = self.models.get(ticker) | |
if model: | |
# Load historical stock data | |
data = self.load_stock_data(ticker) | |
# Define features | |
features = ["year", "month", "day", "ma_5", "ma_10", "lag_5", "lag_10"] | |
# Predict the actual values in the dataset | |
X_actual = data[features] | |
actual_predictions = model.predict(X_actual) | |
data["predicted_close"] = actual_predictions | |
# Use the last available values for features | |
last_date = data["date"].max() | |
next_30_days = pd.date_range( | |
start=last_date + pd.Timedelta(days=1), periods=days | |
) | |
last_values = data[features].iloc[-1].copy() | |
last_5_close = data["close"].iloc[-5:].tolist() | |
last_10_close = data["close"].iloc[-10:].tolist() | |
predictions = [] | |
for date in next_30_days: | |
last_values["year"] = date.year | |
last_values["month"] = date.month | |
last_values["day"] = date.day | |
# Update the lag features | |
if len(last_5_close) >= 5: | |
last_values["lag_5"] = last_5_close[-5] | |
if len(last_10_close) >= 10: | |
last_values["lag_10"] = last_10_close[-10] | |
# Ensure input features are in the correct format | |
prediction_input = pd.DataFrame([last_values], columns=features) | |
prediction = model.predict(prediction_input)[0] | |
predictions.append(prediction) | |
# Update the moving averages dynamically | |
last_5_close.append(prediction) | |
last_10_close.append(prediction) | |
if len(last_5_close) > 5: | |
last_5_close.pop(0) | |
if len(last_10_close) > 10: | |
last_10_close.pop(0) | |
last_values["ma_5"] = np.mean(last_5_close) | |
last_values["ma_10"] = np.mean(last_10_close) | |
prediction_df = pd.DataFrame( | |
{"date": next_30_days, "predicted_close": predictions} | |
) | |
# Concatenate actual and predicted data for plotting, limiting to last 60 days | |
combined_df = pd.concat( | |
[data[["date", "close", "predicted_close"]], prediction_df], | |
ignore_index=True, | |
) | |
plot_data = combined_df.tail(60) | |
plt.figure(figsize=(14, 7)) | |
plt.plot(plot_data["date"], plot_data["close"], label="Actual") | |
plt.plot(plot_data["date"], plot_data["predicted_close"], label="Predicted") | |
plt.xlabel("Date") | |
plt.ylabel("Stock Price") | |
plt.title( | |
f"Last 30 Days Actual and Next {days} Days Prediction for {ticker}" | |
) | |
plt.legend() | |
plt.grid(True) | |
plt.xticks(rotation=45) | |
# Save the plot to a numpy array | |
buf = BytesIO() | |
plt.savefig(buf, format="png") | |
buf.seek(0) | |
img = np.array(plt.imread(buf)) | |
plt.close() | |
return plot_data, img | |
else: | |
return pd.DataFrame({"Error": ["Model not found"]}), None | |
def create_gradio_interface(stock_predictor): | |
""" | |
Creates the Gradio interface for the stock predictor. | |
Parameters | |
---------- | |
stock_predictor : StockPredictor | |
Instance of the StockPredictor class. | |
Returns | |
------- | |
gradio.Interface | |
The Gradio interface. | |
""" | |
tickers = list(stock_predictor.models.keys()) | |
dropdown = gr.Dropdown(choices=tickers, label="Select Ticker") | |
slider = gr.Slider( | |
minimum=1, | |
maximum=30, | |
step=1, | |
label="Number of Days for Forecasting", | |
) | |
iface = gr.Interface( | |
fn=stock_predictor.forecast, | |
inputs=[dropdown, slider], | |
outputs=[ | |
gr.DataFrame(headers=["date", "close", "predicted_close"]), | |
gr.Image(type="numpy"), | |
], | |
title="Stock Price Forecasting", | |
description="Select a ticker and number of days to forecast stock prices.", | |
) | |
return iface | |
if __name__ == "__main__": | |
# Initialize StockPredictor and create Gradio interface | |
stock_predictor = StockPredictor( | |
model_dir="model/SKLearn_Models", | |
data_dir="data/Cleaned_Kaggle_NASDAQ_Daily_Data", | |
) | |
iface = create_gradio_interface(stock_predictor) | |
# Launch the app | |
iface.launch() | |