Spaces:

BraydenMoore
/

marci

Running

App Files Files Community

BraydenMoore commited on Sep 10, 2023

Commit

7727a49

•

1 Parent(s): c34b18c

Fix build methods

Browse files

Files changed (26) hide show

Source/Build/__pycache__/build.cpython-311.pyc +0 -0
Source/Build/build.py +49 -58
Source/Build/update.py +3 -6
Source/Data/gbg.csv +2 -2
Source/Data/gbg_and_odds.csv +2 -2
Source/Models/xgboost_ML_no_odds_69.8%.json +0 -0
Source/Models/xgboost_OU_no_odds_60.8%.json +0 -0
Source/{Data/pbp_this_year.csv → Pickles/test_games_ML_no_odds.pkl} +2 -2
Source/{Data/pbp.csv → Pickles/test_games_OU_no_odds.pkl} +2 -2
Source/Pickles/train_games_ML_no_odds.pkl +3 -0
Source/Pickles/train_games_OU_no_odds.pkl +3 -0
Source/Predict/__pycache__/predict.cpython-311.pyc +0 -0
Source/Predict/predict.py +23 -110
Source/Test/xgboost_ML.py +5 -5
Source/Test/xgboost_ML_no_odds_69.8%_dark.png +0 -0
Source/Test/xgboost_OU.py +5 -5
Source/Test/xgboost_OU_no_odds_60.8%_dark.png +0 -0
Source/Train/xgboost_ML.py +5 -5
Source/Train/xgboost_OU.py +5 -5
Static/xgboost_ML_75.4%.png +0 -0
Static/xgboost_ML_no_odds_69.8%_dark.png +0 -0
Static/xgboost_OU_59.3%.png +0 -0
Static/xgboost_OU_no_odds_60.8%_dark.png +0 -0
Templates/index.html +4 -4
main.py +13 -5
update_data.bat +7 -0

Source/Build/__pycache__/build.cpython-311.pyc CHANGED Viewed

Binary files a/Source/Build/__pycache__/build.cpython-311.pyc and b/Source/Build/__pycache__/build.cpython-311.pyc differ

Source/Build/build.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import nfl_data_py.nfl_data_py as nfl
 from tqdm import tqdm
 import numpy as np
 import pandas as pd
@@ -11,7 +11,11 @@ current_directory = os.path.dirname(os.path.abspath(__file__))
 parent_directory = os.path.dirname(current_directory)
 data_directory = os.path.join(parent_directory, 'Data')
-def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
     """
     Pull data from nflFastR's Github repo.
     If you choose to overwrite, it will replace the existing pbp data with the data you pull.
@@ -19,44 +23,19 @@ def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
     """
     pbp = nfl.import_pbp_data(get_seasons)
     pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
-    if overwrite_seasons:
-        file_path = os.path.join(data_directory, 'pbp.csv')
-        old = pd.read_csv(file_path, index_col=0, low_memory=False)
-        old = old.loc[~old['season'].isin(overwrite_seasons)]
-        pbp = pd.concat([old,pbp])
-        pbp.to_csv(file_path)
-        year = dt.datetime.now().year
-        month = dt.datetime.now().month
-        season = year if month in [8,9,10,11,12] else year-1
-        pbp_this_year = pbp.loc[pbp['season']==season]
-        file_path = os.path.join(data_directory, 'pbp_this_year.csv')
-        pbp_this_year.to_csv(file_path)
     return pbp
-def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
     """
-    Using pbp.csv, build a game-by-game dataset to use for prediction models.
-    Populate update_seasons with the current year to only update this season's data while preserving historical data.
     """
     print('Loading play-by-play data.')
-    if overwrite_seasons:
-        print('Overwriting data for', overwrite_seasons)
-        pbp = get_pbp_data(get_seasons, overwrite_seasons)
-    if not overwrite_seasons:
-        file_path = os.path.join(data_directory, 'pbp.csv')
-        pbp = pd.read_csv(file_path, index_col=0)
-    pbp = pbp.loc[pbp['season'].isin(get_seasons)]
     game_date_dict = dict(pbp[['game_id','game_date']].values)
     teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
-    print(teams)
     seasons = pbp['season'].unique()
     print('Building game-by-game data.')
@@ -134,9 +113,14 @@ def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
             game = team.groupby('game_id').agg(features).reset_index()
             game[['W','L']] = game[['W','L']].expanding().sum()
             game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
-            game[game.columns[1:]] = game[game.columns[1:]].shift()
-            game['TEAM'] = team_name
-            game['Season'] = season
             data = pd.concat([data,game])
@@ -149,25 +133,20 @@ def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
     gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
     gbg['game_date'] = gbg['game_id'].map(game_date_dict)
-    if overwrite_seasons:
-        file_path = os.path.join(data_directory, 'gbg.csv')
-        old = pd.read_csv(file_path, index_col=0, low_memory=False)
-        old = old.loc[~old['Season'].isin(overwrite_seasons)]
-        gbg = pd.concat([old,gbg])
-        file_path = os.path.join(data_directory, 'gbg.csv')
-        gbg.to_csv(file_path)
-        year = dt.datetime.now().year
-        month = dt.datetime.now().month
-        season = year if month in [8,9,10,11,12] else year-1
-        gbg_this_year = gbg.loc[gbg['Season']==season]
         file_path = os.path.join(data_directory, 'gbg_this_year.csv')
-        gbg_this_year.to_csv(file_path)
-    return gbg
-def add_odds_data(gbg, overwrite=False):
     """
     Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
@@ -192,15 +171,27 @@ def add_odds_data(gbg, overwrite=False):
     odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
     odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
-    # merge with gbg
-    gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
-    gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
-    gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
-    gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
-    if overwrite:
-        file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
-        gbg_and_odds.to_csv(file_path)
-    return gbg_and_odds

+from nfl_data_py import nfl_data_py as nfl
 from tqdm import tqdm
 import numpy as np
 import pandas as pd
 parent_directory = os.path.dirname(current_directory)
 data_directory = os.path.join(parent_directory, 'Data')
+year = dt.datetime.now().year
+month = dt.datetime.now().month
+current_season = year if month in [8,9,10,11,12] else year-1
+def get_pbp_data(get_seasons=[]):
     """
     Pull data from nflFastR's Github repo.
     If you choose to overwrite, it will replace the existing pbp data with the data you pull.
     """
     pbp = nfl.import_pbp_data(get_seasons)
     pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
     return pbp
+def build_gbg_data(get_seasons=[]):
     """
+    Build a game-by-game dataset to use for prediction models.
     """
     print('Loading play-by-play data.')
+    pbp = get_pbp_data(get_seasons)
     game_date_dict = dict(pbp[['game_id','game_date']].values)
     teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
     seasons = pbp['season'].unique()
     print('Building game-by-game data.')
             game = team.groupby('game_id').agg(features).reset_index()
             game[['W','L']] = game[['W','L']].expanding().sum()
             game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
+            if season != current_season:
+                game[game.columns[1:]] = game[game.columns[1:]].shift()
+                game['TEAM'] = team_name
+                game['Season'] = season
+            else:
+                game['TEAM'] = team_name
+                game['Season'] = season
             data = pd.concat([data,game])
     gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
     gbg['game_date'] = gbg['game_id'].map(game_date_dict)
+    # save current data
+    if current_season in get_seasons:
+        gbg_this_year = gbg.loc[gbg['Season']==current_season]
         file_path = os.path.join(data_directory, 'gbg_this_year.csv')
+        gbg_this_year.to_csv(file_path, index=False)
+    # save historical data
+    if get_seasons != [current_season]:
+        gbg = gbg.loc[gbg['Season']!=current_season]
+        file_path = os.path.join(data_directory, 'gbg.csv')
+        gbg.to_csv(file_path, index=False)
+def add_odds_data():
     """
     Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
     odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
     odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
+    # load gbg data
+    file_path = os.path.join(data_directory, 'gbg.csv')
+    gbg = pd.read_csv(file_path)
+    file_path = os.path.join(data_directory, 'gbg_this_year.csv')
+    gbg_this_year = pd.read_csv(file_path)
+    # merge and save
+    dataframes = [gbg, gbg_this_year]
+    for idx in range(2):
+        i = dataframes[idx]
+        i['Key'] = i['game_date'].astype(str) + i['home_team'] + i['away_team']
+        gbg_and_odds = i.merge(odds, left_on='Key', right_on='Key')
+        gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
+        gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
+        if idx==0:
+            file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+        else:
+            file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
+        gbg_and_odds.to_csv(file_path, index=False)

Source/Build/update.py CHANGED Viewed

@@ -14,12 +14,9 @@ data_directory = os.path.join(parent_directory, 'Data')
 # get current season
 year = dt.datetime.now().year
 month = dt.datetime.now().month
-season = year if month in [8,9,10,11,12] else year-1
 # update current season
-gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
-gbg_and_odds = build.add_odds_data(gbg)
-gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
-file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
-gbg_and_odds_this_year.to_csv(file_path)

 # get current season
 year = dt.datetime.now().year
 month = dt.datetime.now().month
+current_season = year if month in [8,9,10,11,12] else year-1
 # update current season
+build.build_gbg_data(get_seasons=[current_season])
+build.add_odds_data()

Source/Data/gbg.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
-size 1792121

 version https://git-lfs.github.com/spec/v1
+oid sha256:87ac3980c207a257b4a0d006502ce2a79ea40e72107b8ab76f9f6114209bc6ff
+size 1613287

Source/Data/gbg_and_odds.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
-size 1567692

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb55eaa75e12c6cffdb65601f2a48b5c09850dd5b42daa0415a2ce9896417eb6
+size 1558685

Source/Models/xgboost_ML_no_odds_69.8%.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Source/Models/xgboost_OU_no_odds_60.8%.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Source/{Data/pbp_this_year.csv → Pickles/test_games_ML_no_odds.pkl} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f2e83db244f5d35254e993e424c95307bd148dc7d7676a51b9d02d166219bf4
-size 442403

 version https://git-lfs.github.com/spec/v1
+oid sha256:21a7279e8e37177aa8ab9ef8c19b9e3ad24a804be91708e9d01ffe2a44e16c7e
+size 7361

Source/{Data/pbp.csv → Pickles/test_games_OU_no_odds.pkl} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
-size 247394694

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a8c3ab6053d07713983136f10908961726a1c2039bde108c1537b96c980cf5
+size 7347

Source/Pickles/train_games_ML_no_odds.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58f338cc969ad0cdc5baf333e56eab4bc0481acc7f07bee2bb0786442eca2617
+size 60512

Source/Pickles/train_games_OU_no_odds.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b867b522859680b70670d58d2cb55ca3641b1bcf53bb04f15d9604810a4c7c37
+size 60526

Source/Predict/__pycache__/predict.cpython-311.pyc CHANGED Viewed

Binary files a/Source/Predict/__pycache__/predict.cpython-311.pyc and b/Source/Predict/__pycache__/predict.cpython-311.pyc differ

Source/Predict/predict.py CHANGED Viewed

@@ -12,8 +12,8 @@ data_directory = os.path.join(parent_directory, 'Data')
 model_directory = os.path.join(parent_directory, 'Models')
 pickle_directory = os.path.join(parent_directory, 'Pickles')
-file_path = os.path.join(data_directory, 'pbp_this_year.csv')
-pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
 # get team abbreviations
 file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
@@ -57,101 +57,33 @@ def get_games():
     return df[['Away Team','Home Team','Date']]
-def get_one_week(team_name,season,week):
-    # create columns
-    team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
-    team['GP'] = team['week']
-    team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
-    team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
-    team['W_PCT'] = team['W']/team['GP']
-    team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
-    team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
-    team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
-    team['FG_PCT'] = team['FGM']/team['FGA']
-    team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
-    team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
-    team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
-    team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
-    team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
-    team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
-    team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
-    team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
-    team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
-    team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
-    team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
-    team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
-    team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
-    team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
-    team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
-    team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
-    team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
-    team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
-    team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
-    team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
-    team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
-    team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
-    # aggregate from play-by-play to game-by-game
-    features = {
-        'GP':'mean',
-        'W':'mean',
-        'L':'mean',
-        'W_PCT':'mean',
-        'TOP':'sum',
-        'FGA':'sum',
-        'FGM':'sum',
-        'FG_PCT':'mean',
-        'PassTD':'sum',
-        'RushTD':'sum',
-        'PassTD_Allowed':'sum',
-        'RushTD_Allowed':'sum',
-        'PassYds':'sum',
-        'RushYds':'sum',
-        'PassYds_Allowed':'sum',
-        'RushYds_Allowed':'sum',
-        'Fum':'sum',
-        'Fum_Allowed':'sum',
-        'INT':'sum',
-        'INT_Allowed':'sum',
-        'Sacks':'sum',
-        'Sacks_Allowed':'sum',
-        'Penalties':'sum',
-        'FirstDowns':'sum',
-        '3rdDownConverted':'sum',
-        '3rdDownFailed':'sum',
-        '3rdDownAllowed':'sum',
-        '3rdDownDefended':'sum',
-        'PTS':'mean',
-        'PointDiff':'mean'
-    }
-    game = team.groupby('game_id').agg(features).reset_index()
-    game[['W','L']] = game[['W','L']].expanding().sum()
-    game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
-    game['TEAM'] = team_name
-    game['Season'] = season
-    return game.loc[game['GP']==week-1]
-def get_one_week_home_and_away(home,away,season,week):
-    home = get_one_week(home,season,week)
-    away = get_one_week(away,season,week)
-    away.columns = [f'{i}.Away' for i in away.columns]
-    gbg = home.merge(away,left_index=True,right_index=True)
-    gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
-    return gbg.fillna(0)
 def predict(home,away,season,week,total):
     # finish preparing data
     home_abbrev = team_name_to_abbreviation[home]
     away_abbrev = team_name_to_abbreviation[away]
-    gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
-    gbg['Total Score Close'] = total
-    print(gbg)
-    matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
     # moneyline
-    model = 'xgboost_ML_75.4%'
     file_path = os.path.join(model_directory, f'{model}.json')
     xgb_ml = xgb.Booster()
     xgb_ml.load_model(file_path)
@@ -166,7 +98,7 @@ def predict(home,away,season,week,total):
                      'Probabilities':['N/A']}
     # over/under
-    model = 'xgboost_OU_59.3%'
     file_path = os.path.join(model_directory, f'{model}.json')
     xgb_ou = xgb.Booster()
     xgb_ou.load_model(file_path)
@@ -179,25 +111,6 @@ def predict(home,away,season,week,total):
         over_under = {'Over/Under': 'N/A',
                       'Probability': ['N/A']}
-    return moneyline, over_under
-def update_past_predictions():
-    file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
-    gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
-    total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
-    games = pbp.drop_duplicates(subset='game_id')
-    predictions = {}
-    for _, i in games.iterrows():
-        game_id = i['game_id']
-        home = i['home_team']
-        away = i['away_team']
-        week = i['week']
-        season = i['season']
-        total = total_dict[game_id]
-        predictions[game_id] = predict(home,away,season,week,total)
-    predictions_df = pd.DataFrame(predictions)
-    file_path = os.path.join(data_directory, 'predictions_this_year.csv')
-    predictions_df.to_csv(file_path)

 model_directory = os.path.join(parent_directory, 'Models')
 pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(data_directory, 'gbg_this_year.csv')
+gbg = pd.read_csv(file_path, low_memory=False)
 # get team abbreviations
 file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
     return df[['Away Team','Home Team','Date']]
+def get_one_week(home,away,season,week):
+    try:
+        home_df = gbg.loc[((gbg['away_team']==home) | (gbg['home_team']==home)) & (gbg['Season']==season) & (gbg['GP']==week-1)]
+        home_df = home_df[[i for i in home_df.columns if '.Away' not in i] if home_df['home_team'].item()==home else [i for i in home_df.columns if '.Away' in i]]
+        home_df.columns = [i.replace('.Away','') for i in home_df.columns]
+        away_df = gbg.loc[((gbg['away_team']==away) | (gbg['home_team']==away)) & (gbg['Season']==season) & (gbg['GP']==week-1)]
+        away_df = away_df[[i for i in away_df.columns if '.Away' not in i] if away_df['home_team'].item()==away else [i for i in away_df.columns if '.Away' in i]]
+        away_df.columns = [i.replace('.Away','') + '.Away' for i in away_df.columns]
+        drop_columns = ['game_id', 'Season', 'home_team', 'away_team', 'game_date']
+        df = home_df.merge(away_df, left_on='GP', right_on='GP.Away').drop(columns=drop_columns)
+        return df
+    except ValueError:
+        return pd.DataFrame()
 def predict(home,away,season,week,total):
     # finish preparing data
     home_abbrev = team_name_to_abbreviation[home]
     away_abbrev = team_name_to_abbreviation[away]
+    data = get_one_week(home_abbrev,away_abbrev,season,week)
+    data['Total Score Close'] = total
+    matrix = xgb.DMatrix(data.astype(float).values)
     # moneyline
+    model = 'xgboost_ML_no_odds_69.8%'
     file_path = os.path.join(model_directory, f'{model}.json')
     xgb_ml = xgb.Booster()
     xgb_ml.load_model(file_path)
                      'Probabilities':['N/A']}
     # over/under
+    model = 'xgboost_OU_no_odds_60.8%'
     file_path = os.path.join(model_directory, f'{model}.json')
     xgb_ou = xgb.Booster()
     xgb_ou.load_model(file_path)
         over_under = {'Over/Under': 'N/A',
                       'Probability': ['N/A']}
+    # create game id to save predictions
+    game_id = str(season) + '_' + str(week) + '_' + away + '_' + home
+    return game_id, moneyline, over_under

Source/Test/xgboost_ML.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pickle as pkl
 import numpy as np
 import os
-model = 'xgboost_ML_75.4%'
 current_directory = os.path.dirname(os.path.abspath(__file__))
 parent_directory = os.path.dirname(current_directory)
@@ -16,14 +16,14 @@ file_path = os.path.join(model_directory, f'{model}.json')
 xgb_ml = xgb.Booster()
 xgb_ml.load_model(file_path)
-file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
 with open(file_path,'rb') as f:
     test_games = pkl.load(f).tolist()
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
-gbg_and_odds = pd.read_csv(file_path, index_col=0)
 test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
-test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
 predicted_probas = xgb_ml.predict(test_data_matrix)
 predictions = np.argmax(predicted_probas, axis=1)
@@ -32,7 +32,7 @@ test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
 test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
 bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
-bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
 import matplotlib.pyplot as plt
 fig = plt.figure(facecolor='black')

 import numpy as np
 import os
+model = 'xgboost_ML_no_odds_69.8%'
 current_directory = os.path.dirname(os.path.abspath(__file__))
 parent_directory = os.path.dirname(current_directory)
 xgb_ml = xgb.Booster()
 xgb_ml.load_model(file_path)
+file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
 with open(file_path,'rb') as f:
     test_games = pkl.load(f).tolist()
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+gbg_and_odds = pd.read_csv(file_path)
 test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
+test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds']).astype(float).values)
 predicted_probas = xgb_ml.predict(test_data_matrix)
 predictions = np.argmax(predicted_probas, axis=1)
 test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
 bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
+bets['winnings'] = [h if p==1 else a for h,a,p in bets[['Home Winnings','Away Winnings','prediction']].values]
 import matplotlib.pyplot as plt
 fig = plt.figure(facecolor='black')

Source/Test/xgboost_ML_no_odds_69.8%_dark.png ADDED Viewed

Source/Test/xgboost_OU.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pickle as pkl
 import numpy as np
 import os
-model = 'xgboost_OU_59.3%'
 current_directory = os.path.dirname(os.path.abspath(__file__))
 parent_directory = os.path.dirname(current_directory)
@@ -16,14 +16,14 @@ file_path = os.path.join(model_directory, f'{model}.json')
 xgb_ou = xgb.Booster()
 xgb_ou.load_model(file_path)
-file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
 with open(file_path,'rb') as f:
     test_games = pkl.load(f).tolist()
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
-gbg_and_odds = pd.read_csv(file_path, index_col=0)
 test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
-test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
 predicted_probas = xgb_ou.predict(test_data_matrix)
 predictions = np.argmax(predicted_probas, axis=1)
@@ -31,7 +31,7 @@ test_data['predicted_proba'] = [i[1] for i in predicted_probas]
 test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
 test_data['correct'] = test_data['Over']==test_data['prediction']
-bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
 bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
 import matplotlib.pyplot as plt

 import numpy as np
 import os
+model = 'xgboost_OU_no_odds_60.8%'
 current_directory = os.path.dirname(os.path.abspath(__file__))
 parent_directory = os.path.dirname(current_directory)
 xgb_ou = xgb.Booster()
 xgb_ou.load_model(file_path)
+file_path = os.path.join(pickle_directory, 'test_games_OU_no_odds.pkl')
 with open(file_path,'rb') as f:
     test_games = pkl.load(f).tolist()
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+gbg_and_odds = pd.read_csv(file_path)
 test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
+test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds']).astype(float).values)
 predicted_probas = xgb_ou.predict(test_data_matrix)
 predictions = np.argmax(predicted_probas, axis=1)
 test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
 test_data['correct'] = test_data['Over']==test_data['prediction']
+bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
 bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
 import matplotlib.pyplot as plt

Source/Test/xgboost_OU_no_odds_60.8%_dark.png ADDED Viewed

Source/Train/xgboost_ML.py CHANGED Viewed

@@ -15,10 +15,10 @@ model_directory = os.path.join(parent_directory, 'Models')
 pickle_directory = os.path.join(parent_directory, 'Pickles')
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
-data = pd.read_csv(file_path, index_col=0).dropna()
 margin = data['Home-Team-Win']
-data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
 acc_results = []
@@ -55,15 +55,15 @@ for x in tqdm(range(100)):
     # only save results if they are the best so far
     if acc == max(acc_results):
-        file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(train_games,f)
-        file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(test_games,f)
-        file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
         model.save_model(file_path)
 print('Done')

 pickle_directory = os.path.join(parent_directory, 'Pickles')
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+data = pd.read_csv(file_path).dropna()
 margin = data['Home-Team-Win']
+data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings', 'Home Odds', 'Away Odds'], inplace=True)
 acc_results = []
     # only save results if they are the best so far
     if acc == max(acc_results):
+        file_path = os.path.join(pickle_directory, 'train_games_ML_no_odds.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(train_games,f)
+        file_path = os.path.join(pickle_directory, 'test_games_ML_no_odds.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(test_games,f)
+        file_path = os.path.join(model_directory, f'xgboost_ML_no_odds_{acc}%.json')
         model.save_model(file_path)
 print('Done')

Source/Train/xgboost_OU.py CHANGED Viewed

@@ -15,10 +15,10 @@ model_directory = os.path.join(parent_directory, 'Models')
 pickle_directory = os.path.join(parent_directory, 'Pickles')
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
-data = pd.read_csv(file_path, index_col=0).dropna()
 OU = data['Over']
-data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
 acc_results = []
@@ -56,15 +56,15 @@ for x in tqdm(range(100)):
     # only save results if they are the best so far
     if acc == max(acc_results):
-        file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(train_games,f)
-        file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(test_games,f)
-        file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
         model.save_model(file_path)
 print('Done')

 pickle_directory = os.path.join(parent_directory, 'Pickles')
 file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+data = pd.read_csv(file_path).dropna()
 OU = data['Over']
+data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings','Away Odds','Home Odds'], inplace=True)
 acc_results = []
     # only save results if they are the best so far
     if acc == max(acc_results):
+        file_path = os.path.join(pickle_directory, 'train_games_OU_no_odds.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(train_games,f)
+        file_path = os.path.join(pickle_directory, 'test_games_OU_no_odds.pkl')
         with open(file_path,'wb') as f:
             pkl.dump(test_games,f)
+        file_path = os.path.join(model_directory, f'xgboost_OU_no_odds_{acc}%.json')
         model.save_model(file_path)
 print('Done')

Static/xgboost_ML_75.4%.png DELETED Viewed

Binary file (35.5 kB)

Static/xgboost_ML_no_odds_69.8%_dark.png ADDED Viewed

Static/xgboost_OU_59.3%.png DELETED Viewed

Binary file (34 kB)

Static/xgboost_OU_no_odds_60.8%_dark.png ADDED Viewed

Templates/index.html CHANGED Viewed

@@ -222,9 +222,9 @@
         <div class="section-container">
             <div class="section">
                 <h3>Moneyline</h3>
-                <div class="info"></h3><span class="label">Test Accuracy:</span> 75.4%<br></div>
                 <div class="content">
-                    <img src="/Static/xgboost_ML_75.4%25_dark.png" alt="Moneyline Model">
                     <div class="info">
                         <span class="label">Model:</span> XGBoost<br>
                         <span class="label">Train/Test Split:</span> 1782/199<br>
@@ -237,8 +237,8 @@
             <div class="section">
                 <h3>Over/Under</h3>
                 <div class="content">
-                    <div class="info"></h3><span class="label">Test Accuracy:</span> 59.3%<br></div>
-                    <img src="/Static/xgboost_OU_59.3%25_dark.png" alt="Over/Under Model">
                     <div class="info">
                         <span class="label">Model:</span> XGBoost<br>
                         <span class="label">Train/Test Split:</span> 1782/199<br>

         <div class="section-container">
             <div class="section">
                 <h3>Moneyline</h3>
+                <div class="info"></h3><span class="label">Test Accuracy:</span> 69.8%<br></div>
                 <div class="content">
+                    <img src="/Static/xgboost_ML_no_odds_69.8%25_dark.png" alt="Moneyline Model">
                     <div class="info">
                         <span class="label">Model:</span> XGBoost<br>
                         <span class="label">Train/Test Split:</span> 1782/199<br>
             <div class="section">
                 <h3>Over/Under</h3>
                 <div class="content">
+                    <div class="info"></h3><span class="label">Test Accuracy:</span> 60.8%<br></div>
+                    <img src="/Static/xgboost_OU_no_odds_60.8%25_dark.png" alt="Over/Under Model">
                     <div class="info">
                         <span class="label">Model:</span> XGBoost<br>
                         <span class="label">Train/Test Split:</span> 1782/199<br>

main.py CHANGED Viewed

@@ -1,16 +1,18 @@
 from Source.Predict import predict
 from flask import Flask, render_template, jsonify, request
 import requests
 import pandas as pd
 import numpy as np
 pd.set_option('display.max_columns', None)
 pd.set_option('display.expand_frame_repr', False)
-# update past picks
 try:
-    predict.update_past_predictions()
-except KeyError as e:
-    print("Couldn't update past predictions.")
 # get week, season
 week, season = predict.get_week()
@@ -41,16 +43,22 @@ def submit_games():
     moneylines = []
     over_unders = []
     for row_index,home,away,total in zip(row_indices,home_teams,away_teams,ou_lines):
-        moneyline, over_under = predict.predict(home,away,season,week,total)
         moneyline['rowIndex'] = int(row_index)
         over_under['rowIndex'] = int(row_index)
         moneylines.append(moneyline)
         over_unders.append(over_under)
     print('MoneyLines')
     print(moneylines)
     print('OverUnders')
     print(over_unders)
     return jsonify({'moneylines': moneylines,
                     'over_unders': over_unders})

 from Source.Predict import predict
 from flask import Flask, render_template, jsonify, request
 import requests
+import pickle as pkl
 import pandas as pd
 import numpy as np
 pd.set_option('display.max_columns', None)
 pd.set_option('display.expand_frame_repr', False)
+# load past picks
 try:
+    with open('predictions_this_year.pkl', 'rb') as f:
+        predictions_this_year = pkl.load(f)
+except:
+    predictions_this_year = {}
 # get week, season
 week, season = predict.get_week()
     moneylines = []
     over_unders = []
     for row_index,home,away,total in zip(row_indices,home_teams,away_teams,ou_lines):
+        game_id, moneyline, over_under = predict.predict(home,away,season,week,total)
         moneyline['rowIndex'] = int(row_index)
         over_under['rowIndex'] = int(row_index)
         moneylines.append(moneyline)
         over_unders.append(over_under)
+        predictions_this_year[game_id] = {'Moneyline':moneyline,
+                                          'Over/Under':over_under}
     print('MoneyLines')
     print(moneylines)
     print('OverUnders')
     print(over_unders)
+    #with open('predictions_this_year.pkl', 'wb') as f:
+    #    pkl.dump(predictions_this_year, f)
     return jsonify({'moneylines': moneylines,
                     'over_unders': over_unders})

update_data.bat ADDED Viewed

	@@ -0,0 +1,7 @@

+python "C:\Users\Brayden\OneDrive - stern.nyu.edu\Brayden Moore LLC\Python\Projects\MARCI 3.0\MARCI-NFL-Betting\Source\Build\update.py"
+cd "C:\Users\Brayden\OneDrive - stern.nyu.edu\Brayden Moore LLC\Python\Projects\MARCI 3.0\MARCI-NFL-Betting"
+git add "Source\Data\gbg_and_odds_this_year.csv"
+git add "Source\Data\gbg_this_year.csv"
+git commit -m "Update with 2023 data"
+git push
+pause