BraydenMoore commited on
Commit
7727a49
β€’
1 Parent(s): c34b18c

Fix build methods

Browse files
Source/Build/__pycache__/build.cpython-311.pyc CHANGED
Binary files a/Source/Build/__pycache__/build.cpython-311.pyc and b/Source/Build/__pycache__/build.cpython-311.pyc differ
 
Source/Build/build.py CHANGED
@@ -1,4 +1,4 @@
1
- import nfl_data_py.nfl_data_py as nfl
2
  from tqdm import tqdm
3
  import numpy as np
4
  import pandas as pd
@@ -11,7 +11,11 @@ current_directory = os.path.dirname(os.path.abspath(__file__))
11
  parent_directory = os.path.dirname(current_directory)
12
  data_directory = os.path.join(parent_directory, 'Data')
13
 
14
- def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
 
 
 
 
15
  """
16
  Pull data from nflFastR's Github repo.
17
  If you choose to overwrite, it will replace the existing pbp data with the data you pull.
@@ -19,44 +23,19 @@ def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
19
  """
20
  pbp = nfl.import_pbp_data(get_seasons)
21
  pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
22
-
23
- if overwrite_seasons:
24
- file_path = os.path.join(data_directory, 'pbp.csv')
25
- old = pd.read_csv(file_path, index_col=0, low_memory=False)
26
- old = old.loc[~old['season'].isin(overwrite_seasons)]
27
- pbp = pd.concat([old,pbp])
28
- pbp.to_csv(file_path)
29
-
30
- year = dt.datetime.now().year
31
- month = dt.datetime.now().month
32
- season = year if month in [8,9,10,11,12] else year-1
33
- pbp_this_year = pbp.loc[pbp['season']==season]
34
- file_path = os.path.join(data_directory, 'pbp_this_year.csv')
35
- pbp_this_year.to_csv(file_path)
36
 
37
  return pbp
38
 
39
 
40
- def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
41
  """
42
- Using pbp.csv, build a game-by-game dataset to use for prediction models.
43
- Populate update_seasons with the current year to only update this season's data while preserving historical data.
44
 
45
  """
46
  print('Loading play-by-play data.')
47
-
48
- if overwrite_seasons:
49
- print('Overwriting data for', overwrite_seasons)
50
- pbp = get_pbp_data(get_seasons, overwrite_seasons)
51
-
52
- if not overwrite_seasons:
53
- file_path = os.path.join(data_directory, 'pbp.csv')
54
- pbp = pd.read_csv(file_path, index_col=0)
55
-
56
- pbp = pbp.loc[pbp['season'].isin(get_seasons)]
57
  game_date_dict = dict(pbp[['game_id','game_date']].values)
58
  teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
59
- print(teams)
60
  seasons = pbp['season'].unique()
61
 
62
  print('Building game-by-game data.')
@@ -134,9 +113,14 @@ def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
134
  game = team.groupby('game_id').agg(features).reset_index()
135
  game[['W','L']] = game[['W','L']].expanding().sum()
136
  game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
137
- game[game.columns[1:]] = game[game.columns[1:]].shift()
138
- game['TEAM'] = team_name
139
- game['Season'] = season
 
 
 
 
 
140
 
141
  data = pd.concat([data,game])
142
 
@@ -149,25 +133,20 @@ def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
149
  gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
150
  gbg['game_date'] = gbg['game_id'].map(game_date_dict)
151
 
152
- if overwrite_seasons:
153
- file_path = os.path.join(data_directory, 'gbg.csv')
154
- old = pd.read_csv(file_path, index_col=0, low_memory=False)
155
- old = old.loc[~old['Season'].isin(overwrite_seasons)]
156
- gbg = pd.concat([old,gbg])
157
- file_path = os.path.join(data_directory, 'gbg.csv')
158
- gbg.to_csv(file_path)
159
-
160
- year = dt.datetime.now().year
161
- month = dt.datetime.now().month
162
- season = year if month in [8,9,10,11,12] else year-1
163
- gbg_this_year = gbg.loc[gbg['Season']==season]
164
  file_path = os.path.join(data_directory, 'gbg_this_year.csv')
165
- gbg_this_year.to_csv(file_path)
166
 
167
- return gbg
 
 
 
 
168
 
169
 
170
- def add_odds_data(gbg, overwrite=False):
171
  """
172
  Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
173
 
@@ -192,15 +171,27 @@ def add_odds_data(gbg, overwrite=False):
192
  odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
193
  odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
194
 
195
- # merge with gbg
196
- gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
197
- gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
198
- gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
199
- gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- if overwrite:
202
- file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
203
- gbg_and_odds.to_csv(file_path)
204
-
205
- return gbg_and_odds
206
 
 
1
+ from nfl_data_py import nfl_data_py as nfl
2
  from tqdm import tqdm
3
  import numpy as np
4
  import pandas as pd
 
11
  parent_directory = os.path.dirname(current_directory)
12
  data_directory = os.path.join(parent_directory, 'Data')
13
 
14
+ year = dt.datetime.now().year
15
+ month = dt.datetime.now().month
16
+ current_season = year if month in [8,9,10,11,12] else year-1
17
+
18
+ def get_pbp_data(get_seasons=[]):
19
  """
20
  Pull data from nflFastR's Github repo.
21
  If you choose to overwrite, it will replace the existing pbp data with the data you pull.
 
23
  """
24
  pbp = nfl.import_pbp_data(get_seasons)
25
  pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  return pbp
28
 
29
 
30
+ def build_gbg_data(get_seasons=[]):
31
  """
32
+ Build a game-by-game dataset to use for prediction models.
 
33
 
34
  """
35
  print('Loading play-by-play data.')
36
+ pbp = get_pbp_data(get_seasons)
 
 
 
 
 
 
 
 
 
37
  game_date_dict = dict(pbp[['game_id','game_date']].values)
38
  teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
 
39
  seasons = pbp['season'].unique()
40
 
41
  print('Building game-by-game data.')
 
113
  game = team.groupby('game_id').agg(features).reset_index()
114
  game[['W','L']] = game[['W','L']].expanding().sum()
115
  game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
116
+
117
+ if season != current_season:
118
+ game[game.columns[1:]] = game[game.columns[1:]].shift()
119
+ game['TEAM'] = team_name
120
+ game['Season'] = season
121
+ else:
122
+ game['TEAM'] = team_name
123
+ game['Season'] = season
124
 
125
  data = pd.concat([data,game])
126
 
 
133
  gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
134
  gbg['game_date'] = gbg['game_id'].map(game_date_dict)
135
 
136
+ # save current data
137
+ if current_season in get_seasons:
138
+ gbg_this_year = gbg.loc[gbg['Season']==current_season]
 
 
 
 
 
 
 
 
 
139
  file_path = os.path.join(data_directory, 'gbg_this_year.csv')
140
+ gbg_this_year.to_csv(file_path, index=False)
141
 
142
+ # save historical data
143
+ if get_seasons != [current_season]:
144
+ gbg = gbg.loc[gbg['Season']!=current_season]
145
+ file_path = os.path.join(data_directory, 'gbg.csv')
146
+ gbg.to_csv(file_path, index=False)
147
 
148
 
149
+ def add_odds_data():
150
  """
151
  Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
152
 
 
171
  odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
172
  odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
173
 
174
+ # load gbg data
175
+ file_path = os.path.join(data_directory, 'gbg.csv')
176
+ gbg = pd.read_csv(file_path)
177
+ file_path = os.path.join(data_directory, 'gbg_this_year.csv')
178
+ gbg_this_year = pd.read_csv(file_path)
179
+
180
+ # merge and save
181
+ dataframes = [gbg, gbg_this_year]
182
+ for idx in range(2):
183
+ i = dataframes[idx]
184
+ i['Key'] = i['game_date'].astype(str) + i['home_team'] + i['away_team']
185
+ gbg_and_odds = i.merge(odds, left_on='Key', right_on='Key')
186
+ gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
187
+ gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
188
+
189
+ if idx==0:
190
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
191
+ else:
192
+ file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
193
+
194
+ gbg_and_odds.to_csv(file_path, index=False)
195
 
196
+
 
 
 
 
197
 
Source/Build/update.py CHANGED
@@ -14,12 +14,9 @@ data_directory = os.path.join(parent_directory, 'Data')
14
  # get current season
15
  year = dt.datetime.now().year
16
  month = dt.datetime.now().month
17
- season = year if month in [8,9,10,11,12] else year-1
18
 
19
  # update current season
20
- gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
21
- gbg_and_odds = build.add_odds_data(gbg)
22
- gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
23
 
24
- file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
25
- gbg_and_odds_this_year.to_csv(file_path)
 
14
  # get current season
15
  year = dt.datetime.now().year
16
  month = dt.datetime.now().month
17
+ current_season = year if month in [8,9,10,11,12] else year-1
18
 
19
  # update current season
20
+ build.build_gbg_data(get_seasons=[current_season])
21
+ build.add_odds_data()
 
22
 
 
 
Source/Data/gbg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
3
- size 1792121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ac3980c207a257b4a0d006502ce2a79ea40e72107b8ab76f9f6114209bc6ff
3
+ size 1613287
Source/Data/gbg_and_odds.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
3
- size 1567692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb55eaa75e12c6cffdb65601f2a48b5c09850dd5b42daa0415a2ce9896417eb6
3
+ size 1558685
Source/Models/xgboost_ML_no_odds_69.8%.json ADDED
The diff for this file is too large to render. See raw diff
 
Source/Models/xgboost_OU_no_odds_60.8%.json ADDED
The diff for this file is too large to render. See raw diff
 
Source/{Data/pbp_this_year.csv β†’ Pickles/test_games_ML_no_odds.pkl} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f2e83db244f5d35254e993e424c95307bd148dc7d7676a51b9d02d166219bf4
3
- size 442403
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a7279e8e37177aa8ab9ef8c19b9e3ad24a804be91708e9d01ffe2a44e16c7e
3
+ size 7361
Source/{Data/pbp.csv β†’ Pickles/test_games_OU_no_odds.pkl} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
3
- size 247394694
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a8c3ab6053d07713983136f10908961726a1c2039bde108c1537b96c980cf5
3
+ size 7347
Source/Pickles/train_games_ML_no_odds.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f338cc969ad0cdc5baf333e56eab4bc0481acc7f07bee2bb0786442eca2617
3
+ size 60512
Source/Pickles/train_games_OU_no_odds.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b867b522859680b70670d58d2cb55ca3641b1bcf53bb04f15d9604810a4c7c37
3
+ size 60526
Source/Predict/__pycache__/predict.cpython-311.pyc CHANGED
Binary files a/Source/Predict/__pycache__/predict.cpython-311.pyc and b/Source/Predict/__pycache__/predict.cpython-311.pyc differ
 
Source/Predict/predict.py CHANGED
@@ -12,8 +12,8 @@ data_directory = os.path.join(parent_directory, 'Data')
12
  model_directory = os.path.join(parent_directory, 'Models')
13
  pickle_directory = os.path.join(parent_directory, 'Pickles')
14
 
15
- file_path = os.path.join(data_directory, 'pbp_this_year.csv')
16
- pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
17
 
18
  # get team abbreviations
19
  file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
@@ -57,101 +57,33 @@ def get_games():
57
  return df[['Away Team','Home Team','Date']]
58
 
59
 
60
- def get_one_week(team_name,season,week):
61
- # create columns
62
- team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
63
- team['GP'] = team['week']
64
- team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
65
- team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
66
- team['W_PCT'] = team['W']/team['GP']
67
- team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
68
- team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
69
- team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
70
- team['FG_PCT'] = team['FGM']/team['FGA']
71
- team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
72
- team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
73
- team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
74
- team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
75
- team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
76
- team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
77
- team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
78
- team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
79
- team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
80
- team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
81
- team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
82
- team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
83
- team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
84
- team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
85
- team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
86
- team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
87
- team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
88
- team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
89
- team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
90
- team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
91
- team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
92
- team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
93
-
94
- # aggregate from play-by-play to game-by-game
95
- features = {
96
- 'GP':'mean',
97
- 'W':'mean',
98
- 'L':'mean',
99
- 'W_PCT':'mean',
100
- 'TOP':'sum',
101
- 'FGA':'sum',
102
- 'FGM':'sum',
103
- 'FG_PCT':'mean',
104
- 'PassTD':'sum',
105
- 'RushTD':'sum',
106
- 'PassTD_Allowed':'sum',
107
- 'RushTD_Allowed':'sum',
108
- 'PassYds':'sum',
109
- 'RushYds':'sum',
110
- 'PassYds_Allowed':'sum',
111
- 'RushYds_Allowed':'sum',
112
- 'Fum':'sum',
113
- 'Fum_Allowed':'sum',
114
- 'INT':'sum',
115
- 'INT_Allowed':'sum',
116
- 'Sacks':'sum',
117
- 'Sacks_Allowed':'sum',
118
- 'Penalties':'sum',
119
- 'FirstDowns':'sum',
120
- '3rdDownConverted':'sum',
121
- '3rdDownFailed':'sum',
122
- '3rdDownAllowed':'sum',
123
- '3rdDownDefended':'sum',
124
- 'PTS':'mean',
125
- 'PointDiff':'mean'
126
- }
127
- game = team.groupby('game_id').agg(features).reset_index()
128
- game[['W','L']] = game[['W','L']].expanding().sum()
129
- game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
130
- game['TEAM'] = team_name
131
- game['Season'] = season
132
- return game.loc[game['GP']==week-1]
133
 
 
 
 
134
 
135
- def get_one_week_home_and_away(home,away,season,week):
136
- home = get_one_week(home,season,week)
137
- away = get_one_week(away,season,week)
138
- away.columns = [f'{i}.Away' for i in away.columns]
139
- gbg = home.merge(away,left_index=True,right_index=True)
140
- gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
141
- return gbg.fillna(0)
142
 
143
 
144
  def predict(home,away,season,week,total):
145
  # finish preparing data
146
  home_abbrev = team_name_to_abbreviation[home]
147
  away_abbrev = team_name_to_abbreviation[away]
148
- gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
149
- gbg['Total Score Close'] = total
150
- print(gbg)
151
- matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
152
 
153
  # moneyline
154
- model = 'xgboost_ML_75.4%'
155
  file_path = os.path.join(model_directory, f'{model}.json')
156
  xgb_ml = xgb.Booster()
157
  xgb_ml.load_model(file_path)
@@ -166,7 +98,7 @@ def predict(home,away,season,week,total):
166
  'Probabilities':['N/A']}
167
 
168
  # over/under
169
- model = 'xgboost_OU_59.3%'
170
  file_path = os.path.join(model_directory, f'{model}.json')
171
  xgb_ou = xgb.Booster()
172
  xgb_ou.load_model(file_path)
@@ -179,25 +111,6 @@ def predict(home,away,season,week,total):
179
  over_under = {'Over/Under': 'N/A',
180
  'Probability': ['N/A']}
181
 
182
- return moneyline, over_under
183
-
184
-
185
- def update_past_predictions():
186
- file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
187
- gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
188
- total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
189
- games = pbp.drop_duplicates(subset='game_id')
190
-
191
- predictions = {}
192
- for _, i in games.iterrows():
193
- game_id = i['game_id']
194
- home = i['home_team']
195
- away = i['away_team']
196
- week = i['week']
197
- season = i['season']
198
- total = total_dict[game_id]
199
- predictions[game_id] = predict(home,away,season,week,total)
200
-
201
- predictions_df = pd.DataFrame(predictions)
202
- file_path = os.path.join(data_directory, 'predictions_this_year.csv')
203
- predictions_df.to_csv(file_path)
 
12
  model_directory = os.path.join(parent_directory, 'Models')
13
  pickle_directory = os.path.join(parent_directory, 'Pickles')
14
 
15
+ file_path = os.path.join(data_directory, 'gbg_this_year.csv')
16
+ gbg = pd.read_csv(file_path, low_memory=False)
17
 
18
  # get team abbreviations
19
  file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
 
57
  return df[['Away Team','Home Team','Date']]
58
 
59
 
60
+ def get_one_week(home,away,season,week):
61
+ try:
62
+ home_df = gbg.loc[((gbg['away_team']==home) | (gbg['home_team']==home)) & (gbg['Season']==season) & (gbg['GP']==week-1)]
63
+ home_df = home_df[[i for i in home_df.columns if '.Away' not in i] if home_df['home_team'].item()==home else [i for i in home_df.columns if '.Away' in i]]
64
+ home_df.columns = [i.replace('.Away','') for i in home_df.columns]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ away_df = gbg.loc[((gbg['away_team']==away) | (gbg['home_team']==away)) & (gbg['Season']==season) & (gbg['GP']==week-1)]
67
+ away_df = away_df[[i for i in away_df.columns if '.Away' not in i] if away_df['home_team'].item()==away else [i for i in away_df.columns if '.Away' in i]]
68
+ away_df.columns = [i.replace('.Away','') + '.Away' for i in away_df.columns]
69
 
70
+ drop_columns = ['game_id', 'Season', 'home_team', 'away_team', 'game_date']
71
+ df = home_df.merge(away_df, left_on='GP', right_on='GP.Away').drop(columns=drop_columns)
72
+ return df
73
+ except ValueError:
74
+ return pd.DataFrame()
 
 
75
 
76
 
77
  def predict(home,away,season,week,total):
78
  # finish preparing data
79
  home_abbrev = team_name_to_abbreviation[home]
80
  away_abbrev = team_name_to_abbreviation[away]
81
+ data = get_one_week(home_abbrev,away_abbrev,season,week)
82
+ data['Total Score Close'] = total
83
+ matrix = xgb.DMatrix(data.astype(float).values)
 
84
 
85
  # moneyline
86
+ model = 'xgboost_ML_no_odds_69.8%'
87
  file_path = os.path.join(model_directory, f'{model}.json')
88
  xgb_ml = xgb.Booster()
89
  xgb_ml.load_model(file_path)
 
98
  'Probabilities':['N/A']}
99
 
100
  # over/under
101
+ model = 'xgboost_OU_no_odds_60.8%'
102
  file_path = os.path.join(model_directory, f'{model}.json')
103
  xgb_ou = xgb.Booster()
104
  xgb_ou.load_model(file_path)
 
111
  over_under = {'Over/Under': 'N/A',
112
  'Probability': ['N/A']}
113
 
114
+ # create game id to save predictions
115
+ game_id = str(season) + '_' + str(week) + '_' + away + '_' + home
116
+ return game_id, moneyline, over_under
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Source/Test/xgboost_ML.py CHANGED
@@ -4,7 +4,7 @@ import pickle as pkl
4
  import numpy as np
5
  import os
6
 
7
- model = 'xgboost_ML_75.4%'
8
 
9
  current_directory = os.path.dirname(os.path.abspath(__file__))
10
  parent_directory = os.path.dirname(current_directory)
@@ -16,14 +16,14 @@ file_path = os.path.join(model_directory, f'{model}.json')
16
  xgb_ml = xgb.Booster()
17
  xgb_ml.load_model(file_path)
18
 
19
- file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
20
  with open(file_path,'rb') as f:
21
  test_games = pkl.load(f).tolist()
22
 
23
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
- gbg_and_odds = pd.read_csv(file_path, index_col=0)
25
  test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
- test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
27
 
28
  predicted_probas = xgb_ml.predict(test_data_matrix)
29
  predictions = np.argmax(predicted_probas, axis=1)
@@ -32,7 +32,7 @@ test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
32
  test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
33
 
34
  bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
- bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
36
 
37
  import matplotlib.pyplot as plt
38
  fig = plt.figure(facecolor='black')
 
4
  import numpy as np
5
  import os
6
 
7
+ model = 'xgboost_ML_no_odds_69.8%'
8
 
9
  current_directory = os.path.dirname(os.path.abspath(__file__))
10
  parent_directory = os.path.dirname(current_directory)
 
16
  xgb_ml = xgb.Booster()
17
  xgb_ml.load_model(file_path)
18
 
19
+ file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
20
  with open(file_path,'rb') as f:
21
  test_games = pkl.load(f).tolist()
22
 
23
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
+ gbg_and_odds = pd.read_csv(file_path)
25
  test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
+ test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds']).astype(float).values)
27
 
28
  predicted_probas = xgb_ml.predict(test_data_matrix)
29
  predictions = np.argmax(predicted_probas, axis=1)
 
32
  test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
33
 
34
  bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
+ bets['winnings'] = [h if p==1 else a for h,a,p in bets[['Home Winnings','Away Winnings','prediction']].values]
36
 
37
  import matplotlib.pyplot as plt
38
  fig = plt.figure(facecolor='black')
Source/Test/xgboost_ML_no_odds_69.8%_dark.png ADDED
Source/Test/xgboost_OU.py CHANGED
@@ -4,7 +4,7 @@ import pickle as pkl
4
  import numpy as np
5
  import os
6
 
7
- model = 'xgboost_OU_59.3%'
8
 
9
  current_directory = os.path.dirname(os.path.abspath(__file__))
10
  parent_directory = os.path.dirname(current_directory)
@@ -16,14 +16,14 @@ file_path = os.path.join(model_directory, f'{model}.json')
16
  xgb_ou = xgb.Booster()
17
  xgb_ou.load_model(file_path)
18
 
19
- file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
20
  with open(file_path,'rb') as f:
21
  test_games = pkl.load(f).tolist()
22
 
23
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
- gbg_and_odds = pd.read_csv(file_path, index_col=0)
25
  test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
- test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
27
 
28
  predicted_probas = xgb_ou.predict(test_data_matrix)
29
  predictions = np.argmax(predicted_probas, axis=1)
@@ -31,7 +31,7 @@ test_data['predicted_proba'] = [i[1] for i in predicted_probas]
31
  test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
32
  test_data['correct'] = test_data['Over']==test_data['prediction']
33
 
34
- bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
  bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
36
 
37
  import matplotlib.pyplot as plt
 
4
  import numpy as np
5
  import os
6
 
7
+ model = 'xgboost_OU_no_odds_60.8%'
8
 
9
  current_directory = os.path.dirname(os.path.abspath(__file__))
10
  parent_directory = os.path.dirname(current_directory)
 
16
  xgb_ou = xgb.Booster()
17
  xgb_ou.load_model(file_path)
18
 
19
+ file_path = os.path.join(pickle_directory, 'test_games_OU_no_odds.pkl')
20
  with open(file_path,'rb') as f:
21
  test_games = pkl.load(f).tolist()
22
 
23
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
+ gbg_and_odds = pd.read_csv(file_path)
25
  test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
+ test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds']).astype(float).values)
27
 
28
  predicted_probas = xgb_ou.predict(test_data_matrix)
29
  predictions = np.argmax(predicted_probas, axis=1)
 
31
  test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
32
  test_data['correct'] = test_data['Over']==test_data['prediction']
33
 
34
+ bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
  bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
36
 
37
  import matplotlib.pyplot as plt
Source/Test/xgboost_OU_no_odds_60.8%_dark.png ADDED
Source/Train/xgboost_ML.py CHANGED
@@ -15,10 +15,10 @@ model_directory = os.path.join(parent_directory, 'Models')
15
  pickle_directory = os.path.join(parent_directory, 'Pickles')
16
 
17
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
- data = pd.read_csv(file_path, index_col=0).dropna()
19
 
20
  margin = data['Home-Team-Win']
21
- data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
22
 
23
  acc_results = []
24
 
@@ -55,15 +55,15 @@ for x in tqdm(range(100)):
55
 
56
  # only save results if they are the best so far
57
  if acc == max(acc_results):
58
- file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
59
  with open(file_path,'wb') as f:
60
  pkl.dump(train_games,f)
61
 
62
- file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
63
  with open(file_path,'wb') as f:
64
  pkl.dump(test_games,f)
65
 
66
- file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
67
  model.save_model(file_path)
68
 
69
  print('Done')
 
15
  pickle_directory = os.path.join(parent_directory, 'Pickles')
16
 
17
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
+ data = pd.read_csv(file_path).dropna()
19
 
20
  margin = data['Home-Team-Win']
21
+ data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings', 'Home Odds', 'Away Odds'], inplace=True)
22
 
23
  acc_results = []
24
 
 
55
 
56
  # only save results if they are the best so far
57
  if acc == max(acc_results):
58
+ file_path = os.path.join(pickle_directory, 'train_games_ML_no_odds.pkl')
59
  with open(file_path,'wb') as f:
60
  pkl.dump(train_games,f)
61
 
62
+ file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
63
  with open(file_path,'wb') as f:
64
  pkl.dump(test_games,f)
65
 
66
+ file_path = os.path.join(model_directory, f'xgboost_ML_no_odds_{acc}%.json')
67
  model.save_model(file_path)
68
 
69
  print('Done')
Source/Train/xgboost_OU.py CHANGED
@@ -15,10 +15,10 @@ model_directory = os.path.join(parent_directory, 'Models')
15
  pickle_directory = os.path.join(parent_directory, 'Pickles')
16
 
17
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
- data = pd.read_csv(file_path, index_col=0).dropna()
19
 
20
  OU = data['Over']
21
- data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
22
 
23
  acc_results = []
24
 
@@ -56,15 +56,15 @@ for x in tqdm(range(100)):
56
 
57
  # only save results if they are the best so far
58
  if acc == max(acc_results):
59
- file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
60
  with open(file_path,'wb') as f:
61
  pkl.dump(train_games,f)
62
 
63
- file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
64
  with open(file_path,'wb') as f:
65
  pkl.dump(test_games,f)
66
 
67
- file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
68
  model.save_model(file_path)
69
 
70
  print('Done')
 
15
  pickle_directory = os.path.join(parent_directory, 'Pickles')
16
 
17
  file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
+ data = pd.read_csv(file_path).dropna()
19
 
20
  OU = data['Over']
21
+ data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds'], inplace=True)
22
 
23
  acc_results = []
24
 
 
56
 
57
  # only save results if they are the best so far
58
  if acc == max(acc_results):
59
+ file_path = os.path.join(pickle_directory, 'train_games_OU_no_odds.pkl')
60
  with open(file_path,'wb') as f:
61
  pkl.dump(train_games,f)
62
 
63
+ file_path = os.path.join(pickle_directory, 'test_games_OU_no_odds.pkl')
64
  with open(file_path,'wb') as f:
65
  pkl.dump(test_games,f)
66
 
67
+ file_path = os.path.join(model_directory, f'xgboost_OU_no_odds_{acc}%.json')
68
  model.save_model(file_path)
69
 
70
  print('Done')
Static/xgboost_ML_75.4%.png DELETED
Binary file (35.5 kB)
 
Static/xgboost_ML_no_odds_69.8%_dark.png ADDED
Static/xgboost_OU_59.3%.png DELETED
Binary file (34 kB)
 
Static/xgboost_OU_no_odds_60.8%_dark.png ADDED
Templates/index.html CHANGED
@@ -222,9 +222,9 @@
222
  <div class="section-container">
223
  <div class="section">
224
  <h3>Moneyline</h3>
225
- <div class="info"></h3><span class="label">Test Accuracy:</span> 75.4%<br></div>
226
  <div class="content">
227
- <img src="/Static/xgboost_ML_75.4%25_dark.png" alt="Moneyline Model">
228
  <div class="info">
229
  <span class="label">Model:</span> XGBoost<br>
230
  <span class="label">Train/Test Split:</span> 1782/199<br>
@@ -237,8 +237,8 @@
237
  <div class="section">
238
  <h3>Over/Under</h3>
239
  <div class="content">
240
- <div class="info"></h3><span class="label">Test Accuracy:</span> 59.3%<br></div>
241
- <img src="/Static/xgboost_OU_59.3%25_dark.png" alt="Over/Under Model">
242
  <div class="info">
243
  <span class="label">Model:</span> XGBoost<br>
244
  <span class="label">Train/Test Split:</span> 1782/199<br>
 
222
  <div class="section-container">
223
  <div class="section">
224
  <h3>Moneyline</h3>
225
+ <div class="info"></h3><span class="label">Test Accuracy:</span> 69.8%<br></div>
226
  <div class="content">
227
+ <img src="/Static/xgboost_ML_no_odds_69.8%25_dark.png" alt="Moneyline Model">
228
  <div class="info">
229
  <span class="label">Model:</span> XGBoost<br>
230
  <span class="label">Train/Test Split:</span> 1782/199<br>
 
237
  <div class="section">
238
  <h3>Over/Under</h3>
239
  <div class="content">
240
+ <div class="info"></h3><span class="label">Test Accuracy:</span> 60.8%<br></div>
241
+ <img src="/Static/xgboost_OU_no_odds_60.8%25_dark.png" alt="Over/Under Model">
242
  <div class="info">
243
  <span class="label">Model:</span> XGBoost<br>
244
  <span class="label">Train/Test Split:</span> 1782/199<br>
main.py CHANGED
@@ -1,16 +1,18 @@
1
  from Source.Predict import predict
2
  from flask import Flask, render_template, jsonify, request
3
  import requests
 
4
  import pandas as pd
5
  import numpy as np
6
  pd.set_option('display.max_columns', None)
7
  pd.set_option('display.expand_frame_repr', False)
8
 
9
- # update past picks
10
  try:
11
- predict.update_past_predictions()
12
- except KeyError as e:
13
- print("Couldn't update past predictions.")
 
14
 
15
  # get week, season
16
  week, season = predict.get_week()
@@ -41,16 +43,22 @@ def submit_games():
41
  moneylines = []
42
  over_unders = []
43
  for row_index,home,away,total in zip(row_indices,home_teams,away_teams,ou_lines):
44
- moneyline, over_under = predict.predict(home,away,season,week,total)
45
  moneyline['rowIndex'] = int(row_index)
46
  over_under['rowIndex'] = int(row_index)
47
  moneylines.append(moneyline)
48
  over_unders.append(over_under)
 
 
49
 
50
  print('MoneyLines')
51
  print(moneylines)
52
  print('OverUnders')
53
  print(over_unders)
 
 
 
 
54
  return jsonify({'moneylines': moneylines,
55
  'over_unders': over_unders})
56
 
 
1
  from Source.Predict import predict
2
  from flask import Flask, render_template, jsonify, request
3
  import requests
4
+ import pickle as pkl
5
  import pandas as pd
6
  import numpy as np
7
  pd.set_option('display.max_columns', None)
8
  pd.set_option('display.expand_frame_repr', False)
9
 
10
+ # load past picks
11
  try:
12
+ with open('predictions_this_year.pkl', 'rb') as f:
13
+ predictions_this_year = pkl.load(f)
14
+ except:
15
+ predictions_this_year = {}
16
 
17
  # get week, season
18
  week, season = predict.get_week()
 
43
  moneylines = []
44
  over_unders = []
45
  for row_index,home,away,total in zip(row_indices,home_teams,away_teams,ou_lines):
46
+ game_id, moneyline, over_under = predict.predict(home,away,season,week,total)
47
  moneyline['rowIndex'] = int(row_index)
48
  over_under['rowIndex'] = int(row_index)
49
  moneylines.append(moneyline)
50
  over_unders.append(over_under)
51
+ predictions_this_year[game_id] = {'Moneyline':moneyline,
52
+ 'Over/Under':over_under}
53
 
54
  print('MoneyLines')
55
  print(moneylines)
56
  print('OverUnders')
57
  print(over_unders)
58
+
59
+ #with open('predictions_this_year.pkl', 'wb') as f:
60
+ # pkl.dump(predictions_this_year, f)
61
+
62
  return jsonify({'moneylines': moneylines,
63
  'over_unders': over_unders})
64
 
update_data.bat ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python "C:\Users\Brayden\OneDrive - stern.nyu.edu\Brayden Moore LLC\Python\Projects\MARCI 3.0\MARCI-NFL-Betting\Source\Build\update.py"
2
+ cd "C:\Users\Brayden\OneDrive - stern.nyu.edu\Brayden Moore LLC\Python\Projects\MARCI 3.0\MARCI-NFL-Betting"
3
+ git add "Source\Data\gbg_and_odds_this_year.csv"
4
+ git add "Source\Data\gbg_this_year.csv"
5
+ git commit -m "Update with 2023 data"
6
+ git push
7
+ pause