marci / Source /Train /xgboost_ML.py
BraydenMoore's picture
Fix build methods
7727a49
raw
history blame
2.34 kB
import xgboost as xgb
import pandas as pd
import pickle as pkl
import numpy as np
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import os
current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.dirname(current_directory)
data_directory = os.path.join(parent_directory, 'Data')
model_directory = os.path.join(parent_directory, 'Models')
pickle_directory = os.path.join(parent_directory, 'Pickles')
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
data = pd.read_csv(file_path).dropna()
margin = data['Home-Team-Win']
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings', 'Home Odds', 'Away Odds'], inplace=True)
acc_results = []
for x in tqdm(range(100)):
X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)
train_games = X_train['game_id']
test_games = X_test['game_id']
X_train.drop(columns=['game_id'], inplace=True)
X_test.drop(columns=['game_id'], inplace=True)
train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
param = {
'max_depth': 2,
'eta': 0.01,
'objective': 'multi:softprob',
'num_class': 2
}
epochs = 500
model = xgb.train(param, train, epochs)
predictions = model.predict(test)
y = []
for z in predictions:
y.append(np.argmax(z))
acc = round(accuracy_score(y_test, y)*100, 1)
acc_results.append(acc)
clear_output(wait=True)
print(f"Best accuracy: {max(acc_results)}%")
# only save results if they are the best so far
if acc == max(acc_results):
file_path = os.path.join(pickle_directory, 'train_games_ML_no_odds.pkl')
with open(file_path,'wb') as f:
pkl.dump(train_games,f)
file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
with open(file_path,'wb') as f:
pkl.dump(test_games,f)
file_path = os.path.join(model_directory, f'xgboost_ML_no_odds_{acc}%.json')
model.save_model(file_path)
print('Done')