Spaces:
Running
Running
BraydenMoore
commited on
Commit
•
3231b63
1
Parent(s):
1beb833
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- Dockerfile.txt +29 -0
- Notebook.ipynb +0 -0
- Source/Build/__pycache__/build.cpython-311.pyc +0 -0
- Source/Build/build.py +206 -0
- Source/Build/nfl_data_py +1 -0
- Source/Build/update.py +25 -0
- Source/Data/gbg.csv +3 -0
- Source/Data/gbg_and_odds.csv +3 -0
- Source/Data/gbg_and_odds_this_year.csv +3 -0
- Source/Data/gbg_this_year.csv +3 -0
- Source/Data/pbp.csv +3 -0
- Source/Data/pbp_this_year.csv +3 -0
- Source/Models/__init__.py +0 -0
- Source/Models/xgboost_ML_75.4%.json +0 -0
- Source/Models/xgboost_OU_59.3%.json +0 -0
- Source/Pickles/team_abbreviation_to_name.pkl +3 -0
- Source/Pickles/team_name_to_abbreviation.pkl +3 -0
- Source/Pickles/test_games_ML.pkl +3 -0
- Source/Pickles/test_games_OU.pkl +3 -0
- Source/Pickles/train_games_ML.pkl +3 -0
- Source/Pickles/train_games_OU.pkl +3 -0
- Source/Predict/__pycache__/predict.cpython-311.pyc +0 -0
- Source/Predict/predict.py +201 -0
- Source/Test/__init__.py +0 -0
- Source/Test/xgboost_ML.py +59 -0
- Source/Test/xgboost_ML_75.4%.png +0 -0
- Source/Test/xgboost_ML_75.4%_dark.png +0 -0
- Source/Test/xgboost_OU.py +59 -0
- Source/Test/xgboost_OU_59.3%.png +0 -0
- Source/Test/xgboost_OU_59.3%_dark.png +0 -0
- Source/Train/xgboost_ML.py +69 -0
- Source/Train/xgboost_OU.py +70 -0
- Static/Arizona Cardinals.webp +0 -0
- Static/Atlanta Falcons.webp +0 -0
- Static/Baltimore Ravens.webp +0 -0
- Static/Buffalo Bills.webp +0 -0
- Static/Carolina Panthers.webp +0 -0
- Static/Chicago Bears.webp +0 -0
- Static/Cincinnati Bengals.webp +0 -0
- Static/Cleveland Browns.webp +0 -0
- Static/Dallas Cowboys.webp +0 -0
- Static/Denver Broncos.webp +0 -0
- Static/Detroit Lions.webp +0 -0
- Static/Green Bay Packers.webp +0 -0
- Static/Houston Texans.webp +0 -0
- Static/Indianapolis Colts.webp +0 -0
- Static/Jacksonville Jaguars.webp +0 -0
- Static/Kansas City Chiefs.webp +0 -0
- Static/Las Vegas Raiders.webp +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
Dockerfile.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use the official lightweight Python image.
|
2 |
+
FROM python:3.11
|
3 |
+
|
4 |
+
# Allow statements and log messages to immediately appear in the logs
|
5 |
+
ENV PYTHONUNBUFFERED True
|
6 |
+
|
7 |
+
# Copy local code to the container image.
|
8 |
+
ENV APP_HOME /app
|
9 |
+
WORKDIR $APP_HOME
|
10 |
+
COPY . ./
|
11 |
+
|
12 |
+
# Install production dependencies.
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Create a non-root user and switch to it
|
16 |
+
RUN useradd -m -u 1000 user
|
17 |
+
USER user
|
18 |
+
ENV HOME=/home/user \
|
19 |
+
PATH=/home/user/.local/bin:$PATH
|
20 |
+
|
21 |
+
# Set work directory
|
22 |
+
WORKDIR $APP_HOME
|
23 |
+
|
24 |
+
# Change ownership of app files to the new user
|
25 |
+
COPY --chown=user . $HOME/app
|
26 |
+
|
27 |
+
# Run the web service on container startup.
|
28 |
+
CMD exec gunicorn --bind 0.0.0.0:7860 --workers 9 --threads 16 --timeout 120 main:app
|
29 |
+
|
Notebook.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Source/Build/__pycache__/build.cpython-311.pyc
ADDED
Binary file (20.8 kB). View file
|
|
Source/Build/build.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nfl_data_py.nfl_data_py as nfl
|
2 |
+
from tqdm import tqdm
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
pd.set_option('chained_assignment',None)
|
6 |
+
pd.set_option('display.max_columns',None)
|
7 |
+
import os
|
8 |
+
import datetime as dt
|
9 |
+
|
10 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
11 |
+
parent_directory = os.path.dirname(current_directory)
|
12 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
13 |
+
|
14 |
+
def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
|
15 |
+
"""
|
16 |
+
Pull data from nflFastR's Github repo.
|
17 |
+
If you choose to overwrite, it will replace the existing pbp data with the data you pull.
|
18 |
+
|
19 |
+
"""
|
20 |
+
pbp = nfl.import_pbp_data(get_seasons)
|
21 |
+
pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
|
22 |
+
|
23 |
+
if overwrite_seasons:
|
24 |
+
file_path = os.path.join(data_directory, 'pbp.csv')
|
25 |
+
old = pd.read_csv(file_path, index_col=0, low_memory=False)
|
26 |
+
old = old.loc[~old['season'].isin(overwrite_seasons)]
|
27 |
+
pbp = pd.concat([old,pbp])
|
28 |
+
pbp.to_csv(file_path)
|
29 |
+
|
30 |
+
year = dt.datetime.now().year
|
31 |
+
month = dt.datetime.now().month
|
32 |
+
season = year if month in [8,9,10,11,12] else year-1
|
33 |
+
pbp_this_year = pbp.loc[pbp['season']==season]
|
34 |
+
file_path = os.path.join(data_directory, 'pbp_this_year.csv')
|
35 |
+
pbp_this_year.to_csv(file_path)
|
36 |
+
|
37 |
+
return pbp
|
38 |
+
|
39 |
+
|
40 |
+
def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
|
41 |
+
"""
|
42 |
+
Using pbp.csv, build a game-by-game dataset to use for prediction models.
|
43 |
+
Populate update_seasons with the current year to only update this season's data while preserving historical data.
|
44 |
+
|
45 |
+
"""
|
46 |
+
print('Loading play-by-play data.')
|
47 |
+
|
48 |
+
if overwrite_seasons:
|
49 |
+
print('Overwriting data for', overwrite_seasons)
|
50 |
+
pbp = get_pbp_data(get_seasons, overwrite_seasons)
|
51 |
+
|
52 |
+
if not overwrite_seasons:
|
53 |
+
file_path = os.path.join(data_directory, 'pbp.csv')
|
54 |
+
pbp = pd.read_csv(file_path, index_col=0)
|
55 |
+
|
56 |
+
pbp = pbp.loc[pbp['season'].isin(get_seasons)]
|
57 |
+
game_date_dict = dict(pbp[['game_id','game_date']].values)
|
58 |
+
teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
|
59 |
+
print(teams)
|
60 |
+
seasons = pbp['season'].unique()
|
61 |
+
|
62 |
+
print('Building game-by-game data.')
|
63 |
+
data = pd.DataFrame()
|
64 |
+
for season in seasons:
|
65 |
+
print(season)
|
66 |
+
for team_name in tqdm(teams):
|
67 |
+
# create features
|
68 |
+
team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
|
69 |
+
team['GP'] = team['week']
|
70 |
+
team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
71 |
+
team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
|
72 |
+
team['W_PCT'] = team['W']/team['GP']
|
73 |
+
team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
|
74 |
+
team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
|
75 |
+
team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
|
76 |
+
team['FG_PCT'] = team['FGM']/team['FGA']
|
77 |
+
team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
78 |
+
team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
79 |
+
team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
80 |
+
team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
81 |
+
team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
|
82 |
+
team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
|
83 |
+
team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
|
84 |
+
team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
|
85 |
+
team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
86 |
+
team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
87 |
+
team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
88 |
+
team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
89 |
+
team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
90 |
+
team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
91 |
+
team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
|
92 |
+
team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
|
93 |
+
team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
|
94 |
+
team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
|
95 |
+
team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
|
96 |
+
team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
|
97 |
+
team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
|
98 |
+
team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
99 |
+
|
100 |
+
# aggregate from play-by-play to game-by-game
|
101 |
+
features = {
|
102 |
+
'GP':'mean',
|
103 |
+
'W':'mean',
|
104 |
+
'L':'mean',
|
105 |
+
'W_PCT':'mean',
|
106 |
+
'TOP':'sum',
|
107 |
+
'FGA':'sum',
|
108 |
+
'FGM':'sum',
|
109 |
+
'FG_PCT':'mean',
|
110 |
+
'PassTD':'sum',
|
111 |
+
'RushTD':'sum',
|
112 |
+
'PassTD_Allowed':'sum',
|
113 |
+
'RushTD_Allowed':'sum',
|
114 |
+
'PassYds':'sum',
|
115 |
+
'RushYds':'sum',
|
116 |
+
'PassYds_Allowed':'sum',
|
117 |
+
'RushYds_Allowed':'sum',
|
118 |
+
'Fum':'sum',
|
119 |
+
'Fum_Allowed':'sum',
|
120 |
+
'INT':'sum',
|
121 |
+
'INT_Allowed':'sum',
|
122 |
+
'Sacks':'sum',
|
123 |
+
'Sacks_Allowed':'sum',
|
124 |
+
'Penalties':'sum',
|
125 |
+
'FirstDowns':'sum',
|
126 |
+
'3rdDownConverted':'sum',
|
127 |
+
'3rdDownFailed':'sum',
|
128 |
+
'3rdDownAllowed':'sum',
|
129 |
+
'3rdDownDefended':'sum',
|
130 |
+
'PTS':'mean',
|
131 |
+
'PointDiff':'mean'
|
132 |
+
}
|
133 |
+
|
134 |
+
game = team.groupby('game_id').agg(features).reset_index()
|
135 |
+
game[['W','L']] = game[['W','L']].expanding().sum()
|
136 |
+
game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
|
137 |
+
game[game.columns[1:]] = game[game.columns[1:]].shift()
|
138 |
+
game['TEAM'] = team_name
|
139 |
+
game['Season'] = season
|
140 |
+
|
141 |
+
data = pd.concat([data,game])
|
142 |
+
|
143 |
+
# separate home and away data and merge
|
144 |
+
data = data.merge(pbp[['game_id','home_team','away_team']].drop_duplicates())
|
145 |
+
home = data.loc[data['home_team']==data['TEAM']]
|
146 |
+
away = data.loc[data['away_team']==data['TEAM']]
|
147 |
+
away.columns = [f'{i}.Away' for i in away.columns]
|
148 |
+
gbg = home.merge(away,left_on='game_id',right_on='game_id.Away')
|
149 |
+
gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
|
150 |
+
gbg['game_date'] = gbg['game_id'].map(game_date_dict)
|
151 |
+
|
152 |
+
if overwrite_seasons:
|
153 |
+
file_path = os.path.join(data_directory, 'gbg.csv')
|
154 |
+
old = pd.read_csv(file_path, index_col=0, low_memory=False)
|
155 |
+
old = old.loc[~old['Season'].isin(overwrite_seasons)]
|
156 |
+
gbg = pd.concat([old,gbg])
|
157 |
+
file_path = os.path.join(data_directory, 'gbg.csv')
|
158 |
+
gbg.to_csv(file_path)
|
159 |
+
|
160 |
+
year = dt.datetime.now().year
|
161 |
+
month = dt.datetime.now().month
|
162 |
+
season = year if month in [8,9,10,11,12] else year-1
|
163 |
+
gbg_this_year = gbg.loc[gbg['Season']==season]
|
164 |
+
file_path = os.path.join(data_directory, 'gbg_this_year.csv')
|
165 |
+
gbg_this_year.to_csv(file_path)
|
166 |
+
|
167 |
+
return gbg
|
168 |
+
|
169 |
+
|
170 |
+
def add_odds_data(gbg, overwrite=False):
|
171 |
+
"""
|
172 |
+
Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
|
173 |
+
|
174 |
+
"""
|
175 |
+
|
176 |
+
# get team abbreviations
|
177 |
+
team_descriptions = nfl.import_team_desc()
|
178 |
+
team_abbreviation_dict = dict(team_descriptions[['team_name','team_abbr']].values)
|
179 |
+
|
180 |
+
# get odds
|
181 |
+
odds = pd.read_excel('https://www.aussportsbetting.com/historical_data/nfl.xlsx')
|
182 |
+
odds['Home Team'] = odds['Home Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
|
183 |
+
odds['Away Team'] = odds['Away Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
|
184 |
+
odds['Season'] = [i.year if i.month in [8,9,10,11,12] else i.year-1 for i in odds['Date']]
|
185 |
+
odds['Home Team Abbrev'] = odds['Home Team'].map(team_abbreviation_dict)
|
186 |
+
odds['Away Team Abbrev'] = odds['Away Team'].map(team_abbreviation_dict)
|
187 |
+
odds = odds[['Date','Home Score','Away Score','Home Team Abbrev','Away Team Abbrev','Home Odds Close','Away Odds Close','Total Score Close']]
|
188 |
+
odds['Key'] = odds['Date'].astype(str) + odds['Home Team Abbrev'] + odds['Away Team Abbrev']
|
189 |
+
odds = odds.drop(columns=['Date','Home Team Abbrev','Away Team Abbrev']).dropna()
|
190 |
+
odds['Home Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Home Odds Close']]
|
191 |
+
odds['Away Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Away Odds Close']]
|
192 |
+
odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
|
193 |
+
odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
|
194 |
+
|
195 |
+
# merge with gbg
|
196 |
+
gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
|
197 |
+
gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
|
198 |
+
gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
|
199 |
+
gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
|
200 |
+
|
201 |
+
if overwrite:
|
202 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
203 |
+
gbg_and_odds.to_csv(file_path)
|
204 |
+
|
205 |
+
return gbg_and_odds
|
206 |
+
|
Source/Build/nfl_data_py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit e4988dc303bc441108dd11f4ae93a8200aab10e1
|
Source/Build/update.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nfl_data_py.nfl_data_py as nfl
|
2 |
+
import build
|
3 |
+
import datetime as dt
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
pd.set_option('chained_assignment',None)
|
7 |
+
pd.set_option('display.max_columns',None)
|
8 |
+
import os
|
9 |
+
|
10 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
11 |
+
parent_directory = os.path.dirname(current_directory)
|
12 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
13 |
+
|
14 |
+
# get current season
|
15 |
+
year = dt.datetime.now().year
|
16 |
+
month = dt.datetime.now().month
|
17 |
+
season = year if month in [8,9,10,11,12] else year-1
|
18 |
+
|
19 |
+
# update current season
|
20 |
+
gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
|
21 |
+
gbg_and_odds = build.add_odds_data(gbg)
|
22 |
+
gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
|
23 |
+
|
24 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
|
25 |
+
gbg_and_odds_this_year.to_csv(file_path)
|
Source/Data/gbg.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
|
3 |
+
size 1792121
|
Source/Data/gbg_and_odds.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
|
3 |
+
size 1567692
|
Source/Data/gbg_and_odds_this_year.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b848b812a85a74ad20af51565784382f9a9cd97af3b65d77801dd1d009054f91
|
3 |
+
size 886
|
Source/Data/gbg_this_year.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61d1340b0f2d8f5d4cad8efa0dfa2246adb0748ded9f3841709bde80a7146c74
|
3 |
+
size 844
|
Source/Data/pbp.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
|
3 |
+
size 247394694
|
Source/Data/pbp_this_year.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca7b56d8e82fad5c40ee396ec129c95f2f213864b190078d03a8ec665a0532c6
|
3 |
+
size 405815
|
Source/Models/__init__.py
ADDED
File without changes
|
Source/Models/xgboost_ML_75.4%.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Source/Models/xgboost_OU_59.3%.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Source/Pickles/team_abbreviation_to_name.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d457e4ca669b5000d270669b963ce286a7b8ff0f7139535c7d0bd6439fddd4f
|
3 |
+
size 910
|
Source/Pickles/team_name_to_abbreviation.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdd24bc318fde8622b827dfaa76fdbba5849d11cb61fb99bee50adcebb20fdc1
|
3 |
+
size 903
|
Source/Pickles/test_games_ML.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0b6c58108f994d1f070c6ee85bba812da57d9395646c05e6bf3cb85a16b9f51
|
3 |
+
size 7376
|
Source/Pickles/test_games_OU.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69e2be2359534720fe42752b3e983e327e4e66a0a2bfa5924d4e750db458854e
|
3 |
+
size 7354
|
Source/Pickles/train_games_ML.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d13bfdb558d5753359f56ae4f2450e36ad8b21c10e1cc5e778b786759b83c62
|
3 |
+
size 60497
|
Source/Pickles/train_games_OU.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba28c20549cb0b08e68631bbdce808399dd1ad91f190ba18f6cbfdfeee0a4467
|
3 |
+
size 60519
|
Source/Predict/__pycache__/predict.cpython-311.pyc
ADDED
Binary file (18.8 kB). View file
|
|
Source/Predict/predict.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xgboost as xgb
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import pickle as pkl
|
5 |
+
import os
|
6 |
+
import requests
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
|
9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
10 |
+
parent_directory = os.path.dirname(current_directory)
|
11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
14 |
+
|
15 |
+
file_path = os.path.join(data_directory, 'pbp_this_year.csv')
|
16 |
+
pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
|
17 |
+
|
18 |
+
# get team abbreviations
|
19 |
+
file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
|
20 |
+
with open(file_path, 'rb') as f:
|
21 |
+
team_name_to_abbreviation = pkl.load(f)
|
22 |
+
|
23 |
+
file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
|
24 |
+
with open(file_path, 'rb') as f:
|
25 |
+
team_abbreviation_to_name = pkl.load(f)
|
26 |
+
|
27 |
+
def get_week():
|
28 |
+
headers = {
|
29 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
30 |
+
'Accept-Encoding': 'gzip, deflate',
|
31 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
32 |
+
'Cache-Control': 'max-age=0',
|
33 |
+
'Connection': 'keep-alive',
|
34 |
+
'Dnt': '1',
|
35 |
+
'Upgrade-Insecure-Requests': '1',
|
36 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
|
37 |
+
}
|
38 |
+
url = 'https://www.nfl.com/schedules/'
|
39 |
+
resp = requests.get(url,headers=headers)
|
40 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
41 |
+
h2_tags = soup.find_all('h2')
|
42 |
+
year = h2_tags[0].getText().split(' ')[0]
|
43 |
+
week = h2_tags[0].getText().split(' ')[-1]
|
44 |
+
return int(week), int(year)
|
45 |
+
|
46 |
+
|
47 |
+
def get_games():
|
48 |
+
# pull from NBC
|
49 |
+
url = 'https://www.nbcsports.com/nfl/schedule'
|
50 |
+
df = pd.read_html(url)[0]
|
51 |
+
df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
|
52 |
+
df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
|
53 |
+
df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
|
54 |
+
df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
|
55 |
+
df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
|
56 |
+
|
57 |
+
return df[['Away Team','Home Team','Date']]
|
58 |
+
|
59 |
+
|
60 |
+
def get_one_week(team_name,season,week):
|
61 |
+
# create columns
|
62 |
+
team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
|
63 |
+
team['GP'] = team['week']
|
64 |
+
team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
65 |
+
team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
|
66 |
+
team['W_PCT'] = team['W']/team['GP']
|
67 |
+
team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
|
68 |
+
team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
|
69 |
+
team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
|
70 |
+
team['FG_PCT'] = team['FGM']/team['FGA']
|
71 |
+
team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
72 |
+
team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
73 |
+
team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
74 |
+
team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
75 |
+
team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
|
76 |
+
team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
|
77 |
+
team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
|
78 |
+
team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
|
79 |
+
team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
80 |
+
team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
81 |
+
team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
82 |
+
team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
83 |
+
team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
84 |
+
team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
85 |
+
team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
|
86 |
+
team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
|
87 |
+
team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
|
88 |
+
team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
|
89 |
+
team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
|
90 |
+
team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
|
91 |
+
team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
|
92 |
+
team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
93 |
+
|
94 |
+
# aggregate from play-by-play to game-by-game
|
95 |
+
features = {
|
96 |
+
'GP':'mean',
|
97 |
+
'W':'mean',
|
98 |
+
'L':'mean',
|
99 |
+
'W_PCT':'mean',
|
100 |
+
'TOP':'sum',
|
101 |
+
'FGA':'sum',
|
102 |
+
'FGM':'sum',
|
103 |
+
'FG_PCT':'mean',
|
104 |
+
'PassTD':'sum',
|
105 |
+
'RushTD':'sum',
|
106 |
+
'PassTD_Allowed':'sum',
|
107 |
+
'RushTD_Allowed':'sum',
|
108 |
+
'PassYds':'sum',
|
109 |
+
'RushYds':'sum',
|
110 |
+
'PassYds_Allowed':'sum',
|
111 |
+
'RushYds_Allowed':'sum',
|
112 |
+
'Fum':'sum',
|
113 |
+
'Fum_Allowed':'sum',
|
114 |
+
'INT':'sum',
|
115 |
+
'INT_Allowed':'sum',
|
116 |
+
'Sacks':'sum',
|
117 |
+
'Sacks_Allowed':'sum',
|
118 |
+
'Penalties':'sum',
|
119 |
+
'FirstDowns':'sum',
|
120 |
+
'3rdDownConverted':'sum',
|
121 |
+
'3rdDownFailed':'sum',
|
122 |
+
'3rdDownAllowed':'sum',
|
123 |
+
'3rdDownDefended':'sum',
|
124 |
+
'PTS':'mean',
|
125 |
+
'PointDiff':'mean'
|
126 |
+
}
|
127 |
+
game = team.groupby('game_id').agg(features).reset_index()
|
128 |
+
game[['W','L']] = game[['W','L']].expanding().sum()
|
129 |
+
game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
|
130 |
+
game['TEAM'] = team_name
|
131 |
+
game['Season'] = season
|
132 |
+
return game.loc[game['GP']==week]
|
133 |
+
|
134 |
+
|
135 |
+
def get_one_week_home_and_away(home,away,season,week):
|
136 |
+
home = get_one_week(home,season,week)
|
137 |
+
away = get_one_week(away,season,week)
|
138 |
+
away.columns = [f'{i}.Away' for i in away.columns]
|
139 |
+
gbg = home.merge(away,left_index=True,right_index=True)
|
140 |
+
gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
|
141 |
+
return gbg
|
142 |
+
|
143 |
+
|
144 |
+
def predict(home,away,season,week,total):
|
145 |
+
# finish preparing data
|
146 |
+
home_abbrev = team_name_to_abbreviation[home]
|
147 |
+
away_abbrev = team_name_to_abbreviation[away]
|
148 |
+
gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
|
149 |
+
gbg['Total Score Close'] = total
|
150 |
+
|
151 |
+
matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
|
152 |
+
|
153 |
+
# moneyline
|
154 |
+
model = 'xgboost_ML_75.4%'
|
155 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
156 |
+
xgb_ml = xgb.Booster()
|
157 |
+
xgb_ml.load_model(file_path)
|
158 |
+
try:
|
159 |
+
ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
|
160 |
+
winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba])
|
161 |
+
moneyline = {'Winner': [home if ml_predicted_proba>0.6 else away if ml_predicted_proba<0.4 else 'Toss-Up'],
|
162 |
+
'Probabilities':[winner_proba]}
|
163 |
+
except:
|
164 |
+
moneyline = {'Winner': 'NA',
|
165 |
+
'Probabilities':['N/A']}
|
166 |
+
|
167 |
+
# over/under
|
168 |
+
model = 'xgboost_OU_59.3%'
|
169 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
170 |
+
xgb_ou = xgb.Booster()
|
171 |
+
xgb_ou.load_model(file_path)
|
172 |
+
try:
|
173 |
+
ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
|
174 |
+
over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
|
175 |
+
'Probability': [ou_predicted_proba]}
|
176 |
+
except:
|
177 |
+
over_under = {'Over/Under': 'N/A',
|
178 |
+
'Probabilities': ['N/A']}
|
179 |
+
|
180 |
+
return moneyline, over_under
|
181 |
+
|
182 |
+
|
183 |
+
def update_past_predictions():
|
184 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
|
185 |
+
gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
|
186 |
+
total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
|
187 |
+
games = pbp.drop_duplicates(subset='game_id')
|
188 |
+
|
189 |
+
predictions = {}
|
190 |
+
for _, i in games.iterrows():
|
191 |
+
game_id = i['game_id']
|
192 |
+
home = i['home_team']
|
193 |
+
away = i['away_team']
|
194 |
+
week = i['week']
|
195 |
+
season = i['season']
|
196 |
+
total = total_dict[game_id]
|
197 |
+
predictions[game_id] = predict(home,away,season,week,total)
|
198 |
+
|
199 |
+
predictions_df = pd.DataFrame(predictions)
|
200 |
+
file_path = os.path.join(data_directory, 'predictions_this_year.csv')
|
201 |
+
predictions_df.to_csv(file_path)
|
Source/Test/__init__.py
ADDED
File without changes
|
Source/Test/xgboost_ML.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xgboost as xgb
|
2 |
+
import pandas as pd
|
3 |
+
import pickle as pkl
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
|
7 |
+
model = 'xgboost_ML_75.4%'
|
8 |
+
|
9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
10 |
+
parent_directory = os.path.dirname(current_directory)
|
11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
14 |
+
|
15 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
16 |
+
xgb_ml = xgb.Booster()
|
17 |
+
xgb_ml.load_model(file_path)
|
18 |
+
|
19 |
+
file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
|
20 |
+
with open(file_path,'rb') as f:
|
21 |
+
test_games = pkl.load(f).tolist()
|
22 |
+
|
23 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
24 |
+
gbg_and_odds = pd.read_csv(file_path, index_col=0)
|
25 |
+
test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
|
26 |
+
test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
|
27 |
+
|
28 |
+
predicted_probas = xgb_ml.predict(test_data_matrix)
|
29 |
+
predictions = np.argmax(predicted_probas, axis=1)
|
30 |
+
test_data['predicted_proba'] = [i[1] for i in predicted_probas]
|
31 |
+
test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
|
32 |
+
test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
|
33 |
+
|
34 |
+
bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
|
35 |
+
bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
|
36 |
+
|
37 |
+
import matplotlib.pyplot as plt
|
38 |
+
fig = plt.figure(facecolor='black')
|
39 |
+
ax = fig.add_subplot(1, 1, 1, facecolor='black')
|
40 |
+
|
41 |
+
# Plot data with line color as RGB(0, 128, 0)
|
42 |
+
ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
|
43 |
+
|
44 |
+
# Set title and labels
|
45 |
+
ax.set_title('MARCI 3.0 - MoneyLine w/ 60% Confidence Threshold', color='white')
|
46 |
+
ax.set_xlabel('Games Bet On', color='white')
|
47 |
+
ax.set_ylabel('Return (%)', color='white')
|
48 |
+
|
49 |
+
# Change tick colors to white
|
50 |
+
ax.tick_params(axis='x', colors='white')
|
51 |
+
ax.tick_params(axis='y', colors='white')
|
52 |
+
|
53 |
+
# Change axis edge colors
|
54 |
+
ax.spines['bottom'].set_color('white')
|
55 |
+
ax.spines['top'].set_color('white')
|
56 |
+
ax.spines['left'].set_color('white')
|
57 |
+
ax.spines['right'].set_color('white')
|
58 |
+
|
59 |
+
plt.savefig(f'{model}_dark.png', facecolor='black')
|
Source/Test/xgboost_ML_75.4%.png
ADDED
Source/Test/xgboost_ML_75.4%_dark.png
ADDED
Source/Test/xgboost_OU.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xgboost as xgb
|
2 |
+
import pandas as pd
|
3 |
+
import pickle as pkl
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
|
7 |
+
model = 'xgboost_OU_59.3%'
|
8 |
+
|
9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
10 |
+
parent_directory = os.path.dirname(current_directory)
|
11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
14 |
+
|
15 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
16 |
+
xgb_ou = xgb.Booster()
|
17 |
+
xgb_ou.load_model(file_path)
|
18 |
+
|
19 |
+
file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
|
20 |
+
with open(file_path,'rb') as f:
|
21 |
+
test_games = pkl.load(f).tolist()
|
22 |
+
|
23 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
24 |
+
gbg_and_odds = pd.read_csv(file_path, index_col=0)
|
25 |
+
test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
|
26 |
+
test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
|
27 |
+
|
28 |
+
predicted_probas = xgb_ou.predict(test_data_matrix)
|
29 |
+
predictions = np.argmax(predicted_probas, axis=1)
|
30 |
+
test_data['predicted_proba'] = [i[1] for i in predicted_probas]
|
31 |
+
test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
|
32 |
+
test_data['correct'] = test_data['Over']==test_data['prediction']
|
33 |
+
|
34 |
+
bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
|
35 |
+
bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
|
36 |
+
|
37 |
+
import matplotlib.pyplot as plt
|
38 |
+
fig = plt.figure(facecolor='black')
|
39 |
+
ax = fig.add_subplot(1, 1, 1, facecolor='black')
|
40 |
+
|
41 |
+
# Plot data with line color as RGB(0, 128, 0)
|
42 |
+
ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
|
43 |
+
|
44 |
+
# Set title and labels
|
45 |
+
ax.set_title('MARCI 3.0 - Over/Under', color='white')
|
46 |
+
ax.set_xlabel('Games Bet On', color='white')
|
47 |
+
ax.set_ylabel('Return (%)', color='white')
|
48 |
+
|
49 |
+
# Change tick colors to white
|
50 |
+
ax.tick_params(axis='x', colors='white')
|
51 |
+
ax.tick_params(axis='y', colors='white')
|
52 |
+
|
53 |
+
# Change axis edge colors
|
54 |
+
ax.spines['bottom'].set_color('white')
|
55 |
+
ax.spines['top'].set_color('white')
|
56 |
+
ax.spines['left'].set_color('white')
|
57 |
+
ax.spines['right'].set_color('white')
|
58 |
+
|
59 |
+
plt.savefig(f'{model}_dark.png', facecolor='black')
|
Source/Test/xgboost_OU_59.3%.png
ADDED
Source/Test/xgboost_OU_59.3%_dark.png
ADDED
Source/Train/xgboost_ML.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xgboost as xgb
|
2 |
+
import pandas as pd
|
3 |
+
import pickle as pkl
|
4 |
+
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
from IPython.display import clear_output
|
7 |
+
from sklearn.metrics import accuracy_score
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
import os
|
10 |
+
|
11 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
12 |
+
parent_directory = os.path.dirname(current_directory)
|
13 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
14 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
15 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
16 |
+
|
17 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
18 |
+
data = pd.read_csv(file_path, index_col=0).dropna()
|
19 |
+
|
20 |
+
margin = data['Home-Team-Win']
|
21 |
+
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
|
22 |
+
|
23 |
+
acc_results = []
|
24 |
+
|
25 |
+
for x in tqdm(range(100)):
|
26 |
+
X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)
|
27 |
+
|
28 |
+
train_games = X_train['game_id']
|
29 |
+
test_games = X_test['game_id']
|
30 |
+
|
31 |
+
X_train.drop(columns=['game_id'], inplace=True)
|
32 |
+
X_test.drop(columns=['game_id'], inplace=True)
|
33 |
+
|
34 |
+
train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
|
35 |
+
test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
|
36 |
+
|
37 |
+
param = {
|
38 |
+
'max_depth': 2,
|
39 |
+
'eta': 0.01,
|
40 |
+
'objective': 'multi:softprob',
|
41 |
+
'num_class': 2
|
42 |
+
}
|
43 |
+
epochs = 500
|
44 |
+
|
45 |
+
model = xgb.train(param, train, epochs)
|
46 |
+
predictions = model.predict(test)
|
47 |
+
y = []
|
48 |
+
for z in predictions:
|
49 |
+
y.append(np.argmax(z))
|
50 |
+
|
51 |
+
acc = round(accuracy_score(y_test, y)*100, 1)
|
52 |
+
acc_results.append(acc)
|
53 |
+
clear_output(wait=True)
|
54 |
+
print(f"Best accuracy: {max(acc_results)}%")
|
55 |
+
|
56 |
+
# only save results if they are the best so far
|
57 |
+
if acc == max(acc_results):
|
58 |
+
file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
|
59 |
+
with open(file_path,'wb') as f:
|
60 |
+
pkl.dump(train_games,f)
|
61 |
+
|
62 |
+
file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
|
63 |
+
with open(file_path,'wb') as f:
|
64 |
+
pkl.dump(test_games,f)
|
65 |
+
|
66 |
+
file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
|
67 |
+
model.save_model(file_path)
|
68 |
+
|
69 |
+
print('Done')
|
Source/Train/xgboost_OU.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xgboost as xgb
|
2 |
+
import pandas as pd
|
3 |
+
import pickle as pkl
|
4 |
+
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
from IPython.display import clear_output
|
7 |
+
from sklearn.metrics import accuracy_score
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
import os
|
10 |
+
|
11 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
12 |
+
parent_directory = os.path.dirname(current_directory)
|
13 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
14 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
15 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
16 |
+
|
17 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
18 |
+
data = pd.read_csv(file_path, index_col=0).dropna()
|
19 |
+
|
20 |
+
OU = data['Over']
|
21 |
+
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
|
22 |
+
|
23 |
+
acc_results = []
|
24 |
+
|
25 |
+
for x in tqdm(range(100)):
|
26 |
+
X_train, X_test, y_train, y_test = train_test_split(data, OU, test_size=.1)
|
27 |
+
|
28 |
+
train_games = X_train['game_id']
|
29 |
+
test_games = X_test['game_id']
|
30 |
+
|
31 |
+
X_train.drop(columns=['game_id'], inplace=True)
|
32 |
+
X_test.drop(columns=['game_id'], inplace=True)
|
33 |
+
|
34 |
+
train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
|
35 |
+
test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
|
36 |
+
|
37 |
+
param = {
|
38 |
+
'max_depth': 6,
|
39 |
+
'eta': 0.05,
|
40 |
+
'objective': 'multi:softprob',
|
41 |
+
'num_class': 3
|
42 |
+
}
|
43 |
+
epochs = 300
|
44 |
+
|
45 |
+
model = xgb.train(param, train, epochs)
|
46 |
+
predictions = model.predict(test)
|
47 |
+
y = []
|
48 |
+
|
49 |
+
for z in predictions:
|
50 |
+
y.append(np.argmax(z))
|
51 |
+
|
52 |
+
acc = round(accuracy_score(y_test, y)*100, 1)
|
53 |
+
acc_results.append(acc)
|
54 |
+
clear_output(wait=True)
|
55 |
+
print(f"Best accuracy: {max(acc_results)}%")
|
56 |
+
|
57 |
+
# only save results if they are the best so far
|
58 |
+
if acc == max(acc_results):
|
59 |
+
file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
|
60 |
+
with open(file_path,'wb') as f:
|
61 |
+
pkl.dump(train_games,f)
|
62 |
+
|
63 |
+
file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
|
64 |
+
with open(file_path,'wb') as f:
|
65 |
+
pkl.dump(test_games,f)
|
66 |
+
|
67 |
+
file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
|
68 |
+
model.save_model(file_path)
|
69 |
+
|
70 |
+
print('Done')
|
Static/Arizona Cardinals.webp
ADDED
Static/Atlanta Falcons.webp
ADDED
Static/Baltimore Ravens.webp
ADDED
Static/Buffalo Bills.webp
ADDED
Static/Carolina Panthers.webp
ADDED
Static/Chicago Bears.webp
ADDED
Static/Cincinnati Bengals.webp
ADDED
Static/Cleveland Browns.webp
ADDED
Static/Dallas Cowboys.webp
ADDED
Static/Denver Broncos.webp
ADDED
Static/Detroit Lions.webp
ADDED
Static/Green Bay Packers.webp
ADDED
Static/Houston Texans.webp
ADDED
Static/Indianapolis Colts.webp
ADDED
Static/Jacksonville Jaguars.webp
ADDED
Static/Kansas City Chiefs.webp
ADDED
Static/Las Vegas Raiders.webp
ADDED