Spaces:

BraydenMoore
/

marci

Running

App Files Files Community

BraydenMoore commited on Sep 9, 2023

Commit

3231b63

•

1 Parent(s): 1beb833

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
Dockerfile.txt +29 -0
Notebook.ipynb +0 -0
Source/Build/__pycache__/build.cpython-311.pyc +0 -0
Source/Build/build.py +206 -0
Source/Build/nfl_data_py +1 -0
Source/Build/update.py +25 -0
Source/Data/gbg.csv +3 -0
Source/Data/gbg_and_odds.csv +3 -0
Source/Data/gbg_and_odds_this_year.csv +3 -0
Source/Data/gbg_this_year.csv +3 -0
Source/Data/pbp.csv +3 -0
Source/Data/pbp_this_year.csv +3 -0
Source/Models/__init__.py +0 -0
Source/Models/xgboost_ML_75.4%.json +0 -0
Source/Models/xgboost_OU_59.3%.json +0 -0
Source/Pickles/team_abbreviation_to_name.pkl +3 -0
Source/Pickles/team_name_to_abbreviation.pkl +3 -0
Source/Pickles/test_games_ML.pkl +3 -0
Source/Pickles/test_games_OU.pkl +3 -0
Source/Pickles/train_games_ML.pkl +3 -0
Source/Pickles/train_games_OU.pkl +3 -0
Source/Predict/__pycache__/predict.cpython-311.pyc +0 -0
Source/Predict/predict.py +201 -0
Source/Test/__init__.py +0 -0
Source/Test/xgboost_ML.py +59 -0
Source/Test/xgboost_ML_75.4%.png +0 -0
Source/Test/xgboost_ML_75.4%_dark.png +0 -0
Source/Test/xgboost_OU.py +59 -0
Source/Test/xgboost_OU_59.3%.png +0 -0
Source/Test/xgboost_OU_59.3%_dark.png +0 -0
Source/Train/xgboost_ML.py +69 -0
Source/Train/xgboost_OU.py +70 -0
Static/Arizona Cardinals.webp +0 -0
Static/Atlanta Falcons.webp +0 -0
Static/Baltimore Ravens.webp +0 -0
Static/Buffalo Bills.webp +0 -0
Static/Carolina Panthers.webp +0 -0
Static/Chicago Bears.webp +0 -0
Static/Cincinnati Bengals.webp +0 -0
Static/Cleveland Browns.webp +0 -0
Static/Dallas Cowboys.webp +0 -0
Static/Denver Broncos.webp +0 -0
Static/Detroit Lions.webp +0 -0
Static/Green Bay Packers.webp +0 -0
Static/Houston Texans.webp +0 -0
Static/Indianapolis Colts.webp +0 -0
Static/Jacksonville Jaguars.webp +0 -0
Static/Kansas City Chiefs.webp +0 -0
Static/Las Vegas Raiders.webp +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+# Use the official lightweight Python image.
+FROM python:3.11
+# Allow statements and log messages to immediately appear in the logs
+ENV PYTHONUNBUFFERED True
+# Copy local code to the container image.
+ENV APP_HOME /app
+WORKDIR $APP_HOME
+COPY . ./
+# Install production dependencies.
+RUN pip install --no-cache-dir -r requirements.txt
+# Create a non-root user and switch to it
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set work directory
+WORKDIR $APP_HOME
+# Change ownership of app files to the new user
+COPY --chown=user . $HOME/app
+# Run the web service on container startup.
+CMD exec gunicorn --bind 0.0.0.0:7860 --workers 9 --threads 16 --timeout 120 main:app

Notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Source/Build/__pycache__/build.cpython-311.pyc ADDED Viewed

Binary file (20.8 kB). View file

Source/Build/build.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import nfl_data_py.nfl_data_py as nfl
+from tqdm import tqdm
+import numpy as np
+import pandas as pd
+pd.set_option('chained_assignment',None)
+pd.set_option('display.max_columns',None)
+import os
+import datetime as dt
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
+    """
+    Pull data from nflFastR's Github repo.
+    If you choose to overwrite, it will replace the existing pbp data with the data you pull.
+    """
+    pbp = nfl.import_pbp_data(get_seasons)
+    pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
+    if overwrite_seasons:
+        file_path = os.path.join(data_directory, 'pbp.csv')
+        old = pd.read_csv(file_path, index_col=0, low_memory=False)
+        old = old.loc[~old['season'].isin(overwrite_seasons)]
+        pbp = pd.concat([old,pbp])
+        pbp.to_csv(file_path)
+        year = dt.datetime.now().year
+        month = dt.datetime.now().month
+        season = year if month in [8,9,10,11,12] else year-1
+        pbp_this_year = pbp.loc[pbp['season']==season]
+        file_path = os.path.join(data_directory, 'pbp_this_year.csv')
+        pbp_this_year.to_csv(file_path)
+    return pbp
+def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
+    """
+    Using pbp.csv, build a game-by-game dataset to use for prediction models.
+    Populate update_seasons with the current year to only update this season's data while preserving historical data.
+    """
+    print('Loading play-by-play data.')
+    if overwrite_seasons:
+        print('Overwriting data for', overwrite_seasons)
+        pbp = get_pbp_data(get_seasons, overwrite_seasons)
+    if not overwrite_seasons:
+        file_path = os.path.join(data_directory, 'pbp.csv')
+        pbp = pd.read_csv(file_path, index_col=0)
+    pbp = pbp.loc[pbp['season'].isin(get_seasons)]
+    game_date_dict = dict(pbp[['game_id','game_date']].values)
+    teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
+    print(teams)
+    seasons = pbp['season'].unique()
+    print('Building game-by-game data.')
+    data = pd.DataFrame()
+    for season in seasons:
+        print(season)
+        for team_name in tqdm(teams):
+            # create features
+            team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
+            team['GP'] = team['week']
+            team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
+            team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
+            team['W_PCT'] = team['W']/team['GP']
+            team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
+            team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
+            team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
+            team['FG_PCT'] = team['FGM']/team['FGA']
+            team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
+            team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
+            team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
+            team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
+            team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
+            team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
+            team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
+            team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
+            team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
+            team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
+            team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
+            team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
+            team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
+            team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
+            team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
+            team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
+            team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
+            team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
+            team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
+            team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
+            team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
+            team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
+            # aggregate from play-by-play to game-by-game
+            features = {
+                'GP':'mean',
+                'W':'mean',
+                'L':'mean',
+                'W_PCT':'mean',
+                'TOP':'sum',
+                'FGA':'sum',
+                'FGM':'sum',
+                'FG_PCT':'mean',
+                'PassTD':'sum',
+                'RushTD':'sum',
+                'PassTD_Allowed':'sum',
+                'RushTD_Allowed':'sum',
+                'PassYds':'sum',
+                'RushYds':'sum',
+                'PassYds_Allowed':'sum',
+                'RushYds_Allowed':'sum',
+                'Fum':'sum',
+                'Fum_Allowed':'sum',
+                'INT':'sum',
+                'INT_Allowed':'sum',
+                'Sacks':'sum',
+                'Sacks_Allowed':'sum',
+                'Penalties':'sum',
+                'FirstDowns':'sum',
+                '3rdDownConverted':'sum',
+                '3rdDownFailed':'sum',
+                '3rdDownAllowed':'sum',
+                '3rdDownDefended':'sum',
+                'PTS':'mean',
+                'PointDiff':'mean'
+            }
+            game = team.groupby('game_id').agg(features).reset_index()
+            game[['W','L']] = game[['W','L']].expanding().sum()
+            game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
+            game[game.columns[1:]] = game[game.columns[1:]].shift()
+            game['TEAM'] = team_name
+            game['Season'] = season
+            data = pd.concat([data,game])
+    # separate home and away data and merge
+    data = data.merge(pbp[['game_id','home_team','away_team']].drop_duplicates())
+    home = data.loc[data['home_team']==data['TEAM']]
+    away = data.loc[data['away_team']==data['TEAM']]
+    away.columns = [f'{i}.Away' for i in away.columns]
+    gbg = home.merge(away,left_on='game_id',right_on='game_id.Away')
+    gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
+    gbg['game_date'] = gbg['game_id'].map(game_date_dict)
+    if overwrite_seasons:
+        file_path = os.path.join(data_directory, 'gbg.csv')
+        old = pd.read_csv(file_path, index_col=0, low_memory=False)
+        old = old.loc[~old['Season'].isin(overwrite_seasons)]
+        gbg = pd.concat([old,gbg])
+        file_path = os.path.join(data_directory, 'gbg.csv')
+        gbg.to_csv(file_path)
+        year = dt.datetime.now().year
+        month = dt.datetime.now().month
+        season = year if month in [8,9,10,11,12] else year-1
+        gbg_this_year = gbg.loc[gbg['Season']==season]
+        file_path = os.path.join(data_directory, 'gbg_this_year.csv')
+        gbg_this_year.to_csv(file_path)
+    return gbg
+def add_odds_data(gbg, overwrite=False):
+    """
+    Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
+    """
+    # get team abbreviations
+    team_descriptions = nfl.import_team_desc()
+    team_abbreviation_dict = dict(team_descriptions[['team_name','team_abbr']].values)
+    # get odds
+    odds = pd.read_excel('https://www.aussportsbetting.com/historical_data/nfl.xlsx')
+    odds['Home Team'] = odds['Home Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
+    odds['Away Team'] = odds['Away Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
+    odds['Season'] = [i.year if i.month in [8,9,10,11,12] else i.year-1 for i in odds['Date']]
+    odds['Home Team Abbrev'] = odds['Home Team'].map(team_abbreviation_dict)
+    odds['Away Team Abbrev'] = odds['Away Team'].map(team_abbreviation_dict)
+    odds = odds[['Date','Home Score','Away Score','Home Team Abbrev','Away Team Abbrev','Home Odds Close','Away Odds Close','Total Score Close']]
+    odds['Key'] = odds['Date'].astype(str) + odds['Home Team Abbrev'] + odds['Away Team Abbrev']
+    odds = odds.drop(columns=['Date','Home Team Abbrev','Away Team Abbrev']).dropna()
+    odds['Home Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Home Odds Close']]
+    odds['Away Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Away Odds Close']]
+    odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
+    odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
+    # merge with gbg
+    gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
+    gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
+    gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
+    gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
+    if overwrite:
+        file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+        gbg_and_odds.to_csv(file_path)
+    return gbg_and_odds

Source/Build/nfl_data_py ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit e4988dc303bc441108dd11f4ae93a8200aab10e1

Source/Build/update.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import nfl_data_py.nfl_data_py as nfl
+import build
+import datetime as dt
+import numpy as np
+import pandas as pd
+pd.set_option('chained_assignment',None)
+pd.set_option('display.max_columns',None)
+import os
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+# get current season
+year = dt.datetime.now().year
+month = dt.datetime.now().month
+season = year if month in [8,9,10,11,12] else year-1
+# update current season
+gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
+gbg_and_odds = build.add_odds_data(gbg)
+gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
+file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
+gbg_and_odds_this_year.to_csv(file_path)

Source/Data/gbg.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
+size 1792121

Source/Data/gbg_and_odds.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
+size 1567692

Source/Data/gbg_and_odds_this_year.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b848b812a85a74ad20af51565784382f9a9cd97af3b65d77801dd1d009054f91
+size 886

Source/Data/gbg_this_year.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61d1340b0f2d8f5d4cad8efa0dfa2246adb0748ded9f3841709bde80a7146c74
+size 844

Source/Data/pbp.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
+size 247394694

Source/Data/pbp_this_year.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca7b56d8e82fad5c40ee396ec129c95f2f213864b190078d03a8ec665a0532c6
+size 405815

Source/Models/__init__.py ADDED Viewed

File without changes

Source/Models/xgboost_ML_75.4%.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Source/Models/xgboost_OU_59.3%.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Source/Pickles/team_abbreviation_to_name.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d457e4ca669b5000d270669b963ce286a7b8ff0f7139535c7d0bd6439fddd4f
+size 910

Source/Pickles/team_name_to_abbreviation.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdd24bc318fde8622b827dfaa76fdbba5849d11cb61fb99bee50adcebb20fdc1
+size 903

Source/Pickles/test_games_ML.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b6c58108f994d1f070c6ee85bba812da57d9395646c05e6bf3cb85a16b9f51
+size 7376

Source/Pickles/test_games_OU.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69e2be2359534720fe42752b3e983e327e4e66a0a2bfa5924d4e750db458854e
+size 7354

Source/Pickles/train_games_ML.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d13bfdb558d5753359f56ae4f2450e36ad8b21c10e1cc5e778b786759b83c62
+size 60497

Source/Pickles/train_games_OU.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba28c20549cb0b08e68631bbdce808399dd1ad91f190ba18f6cbfdfeee0a4467
+size 60519

Source/Predict/__pycache__/predict.cpython-311.pyc ADDED Viewed

Binary file (18.8 kB). View file

Source/Predict/predict.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import xgboost as xgb
+import numpy as np
+import pandas as pd
+import pickle as pkl
+import os
+import requests
+from bs4 import BeautifulSoup
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+model_directory = os.path.join(parent_directory, 'Models')
+pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(data_directory, 'pbp_this_year.csv')
+pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
+# get team abbreviations
+file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
+with open(file_path, 'rb') as f:
+    team_name_to_abbreviation = pkl.load(f)
+file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
+with open(file_path, 'rb') as f:
+    team_abbreviation_to_name = pkl.load(f)
+def get_week():
+    headers = {
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+    'Accept-Encoding': 'gzip, deflate',
+    'Accept-Language': 'en-US,en;q=0.9',
+    'Cache-Control': 'max-age=0',
+    'Connection': 'keep-alive',
+    'Dnt': '1',
+    'Upgrade-Insecure-Requests': '1',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
+    }
+    url = 'https://www.nfl.com/schedules/'
+    resp = requests.get(url,headers=headers)
+    soup = BeautifulSoup(resp.text, 'html.parser')
+    h2_tags = soup.find_all('h2')
+    year = h2_tags[0].getText().split(' ')[0]
+    week = h2_tags[0].getText().split(' ')[-1]
+    return int(week), int(year)
+def get_games():
+    # pull from NBC
+    url = 'https://www.nbcsports.com/nfl/schedule'
+    df = pd.read_html(url)[0]
+    df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
+    df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
+    df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
+    df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
+    df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
+    return df[['Away Team','Home Team','Date']]
+def get_one_week(team_name,season,week):
+    # create columns
+    team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
+    team['GP'] = team['week']
+    team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
+    team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
+    team['W_PCT'] = team['W']/team['GP']
+    team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
+    team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
+    team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
+    team['FG_PCT'] = team['FGM']/team['FGA']
+    team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
+    team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
+    team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
+    team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
+    team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
+    team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
+    team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
+    team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
+    team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
+    team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
+    team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
+    team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
+    team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
+    team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
+    team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
+    team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
+    team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
+    team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
+    team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
+    team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
+    team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
+    team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
+    # aggregate from play-by-play to game-by-game
+    features = {
+        'GP':'mean',
+        'W':'mean',
+        'L':'mean',
+        'W_PCT':'mean',
+        'TOP':'sum',
+        'FGA':'sum',
+        'FGM':'sum',
+        'FG_PCT':'mean',
+        'PassTD':'sum',
+        'RushTD':'sum',
+        'PassTD_Allowed':'sum',
+        'RushTD_Allowed':'sum',
+        'PassYds':'sum',
+        'RushYds':'sum',
+        'PassYds_Allowed':'sum',
+        'RushYds_Allowed':'sum',
+        'Fum':'sum',
+        'Fum_Allowed':'sum',
+        'INT':'sum',
+        'INT_Allowed':'sum',
+        'Sacks':'sum',
+        'Sacks_Allowed':'sum',
+        'Penalties':'sum',
+        'FirstDowns':'sum',
+        '3rdDownConverted':'sum',
+        '3rdDownFailed':'sum',
+        '3rdDownAllowed':'sum',
+        '3rdDownDefended':'sum',
+        'PTS':'mean',
+        'PointDiff':'mean'
+    }
+    game = team.groupby('game_id').agg(features).reset_index()
+    game[['W','L']] = game[['W','L']].expanding().sum()
+    game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
+    game['TEAM'] = team_name
+    game['Season'] = season
+    return game.loc[game['GP']==week]
+def get_one_week_home_and_away(home,away,season,week):
+    home = get_one_week(home,season,week)
+    away = get_one_week(away,season,week)
+    away.columns = [f'{i}.Away' for i in away.columns]
+    gbg = home.merge(away,left_index=True,right_index=True)
+    gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
+    return gbg
+def predict(home,away,season,week,total):
+    # finish preparing data
+    home_abbrev = team_name_to_abbreviation[home]
+    away_abbrev = team_name_to_abbreviation[away]
+    gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
+    gbg['Total Score Close'] = total
+    matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
+    # moneyline
+    model = 'xgboost_ML_75.4%'
+    file_path = os.path.join(model_directory, f'{model}.json')
+    xgb_ml = xgb.Booster()
+    xgb_ml.load_model(file_path)
+    try:
+        ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
+        winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba])
+        moneyline = {'Winner': [home if ml_predicted_proba>0.6 else away if ml_predicted_proba<0.4 else 'Toss-Up'],
+                     'Probabilities':[winner_proba]}
+    except:
+        moneyline = {'Winner': 'NA',
+                     'Probabilities':['N/A']}
+    # over/under
+    model = 'xgboost_OU_59.3%'
+    file_path = os.path.join(model_directory, f'{model}.json')
+    xgb_ou = xgb.Booster()
+    xgb_ou.load_model(file_path)
+    try:
+        ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
+        over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
+                      'Probability': [ou_predicted_proba]}
+    except:
+        over_under = {'Over/Under': 'N/A',
+                      'Probabilities': ['N/A']}
+    return moneyline, over_under
+def update_past_predictions():
+    file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
+    gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
+    total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
+    games = pbp.drop_duplicates(subset='game_id')
+    predictions = {}
+    for _, i in games.iterrows():
+        game_id = i['game_id']
+        home = i['home_team']
+        away = i['away_team']
+        week = i['week']
+        season = i['season']
+        total = total_dict[game_id]
+        predictions[game_id] = predict(home,away,season,week,total)
+    predictions_df = pd.DataFrame(predictions)
+    file_path = os.path.join(data_directory, 'predictions_this_year.csv')
+    predictions_df.to_csv(file_path)

Source/Test/__init__.py ADDED Viewed

File without changes

Source/Test/xgboost_ML.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import xgboost as xgb
+import pandas as pd
+import pickle as pkl
+import numpy as np
+import os
+model = 'xgboost_ML_75.4%'
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+model_directory = os.path.join(parent_directory, 'Models')
+pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(model_directory, f'{model}.json')
+xgb_ml = xgb.Booster()
+xgb_ml.load_model(file_path)
+file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
+with open(file_path,'rb') as f:
+    test_games = pkl.load(f).tolist()
+file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+gbg_and_odds = pd.read_csv(file_path, index_col=0)
+test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
+test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
+predicted_probas = xgb_ml.predict(test_data_matrix)
+predictions = np.argmax(predicted_probas, axis=1)
+test_data['predicted_proba'] = [i[1] for i in predicted_probas]
+test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
+test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
+bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
+bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
+import matplotlib.pyplot as plt
+fig = plt.figure(facecolor='black')
+ax = fig.add_subplot(1, 1, 1, facecolor='black')
+# Plot data with line color as RGB(0, 128, 0)
+ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
+# Set title and labels
+ax.set_title('MARCI 3.0 - MoneyLine w/ 60% Confidence Threshold', color='white')
+ax.set_xlabel('Games Bet On', color='white')
+ax.set_ylabel('Return (%)', color='white')
+# Change tick colors to white
+ax.tick_params(axis='x', colors='white')
+ax.tick_params(axis='y', colors='white')
+# Change axis edge colors
+ax.spines['bottom'].set_color('white')
+ax.spines['top'].set_color('white')
+ax.spines['left'].set_color('white')
+ax.spines['right'].set_color('white')
+plt.savefig(f'{model}_dark.png', facecolor='black')

Source/Test/xgboost_ML_75.4%.png ADDED Viewed

Source/Test/xgboost_ML_75.4%_dark.png ADDED Viewed

Source/Test/xgboost_OU.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import xgboost as xgb
+import pandas as pd
+import pickle as pkl
+import numpy as np
+import os
+model = 'xgboost_OU_59.3%'
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+model_directory = os.path.join(parent_directory, 'Models')
+pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(model_directory, f'{model}.json')
+xgb_ou = xgb.Booster()
+xgb_ou.load_model(file_path)
+file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
+with open(file_path,'rb') as f:
+    test_games = pkl.load(f).tolist()
+file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+gbg_and_odds = pd.read_csv(file_path, index_col=0)
+test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
+test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
+predicted_probas = xgb_ou.predict(test_data_matrix)
+predictions = np.argmax(predicted_probas, axis=1)
+test_data['predicted_proba'] = [i[1] for i in predicted_probas]
+test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
+test_data['correct'] = test_data['Over']==test_data['prediction']
+bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
+bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
+import matplotlib.pyplot as plt
+fig = plt.figure(facecolor='black')
+ax = fig.add_subplot(1, 1, 1, facecolor='black')
+# Plot data with line color as RGB(0, 128, 0)
+ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
+# Set title and labels
+ax.set_title('MARCI 3.0 - Over/Under', color='white')
+ax.set_xlabel('Games Bet On', color='white')
+ax.set_ylabel('Return (%)', color='white')
+# Change tick colors to white
+ax.tick_params(axis='x', colors='white')
+ax.tick_params(axis='y', colors='white')
+# Change axis edge colors
+ax.spines['bottom'].set_color('white')
+ax.spines['top'].set_color('white')
+ax.spines['left'].set_color('white')
+ax.spines['right'].set_color('white')
+plt.savefig(f'{model}_dark.png', facecolor='black')

Source/Test/xgboost_OU_59.3%.png ADDED Viewed

Source/Test/xgboost_OU_59.3%_dark.png ADDED Viewed

Source/Train/xgboost_ML.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import xgboost as xgb
+import pandas as pd
+import pickle as pkl
+import numpy as np
+from tqdm import tqdm
+from IPython.display import clear_output
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+import os
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+model_directory = os.path.join(parent_directory, 'Models')
+pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+data = pd.read_csv(file_path, index_col=0).dropna()
+margin = data['Home-Team-Win']
+data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
+acc_results = []
+for x in tqdm(range(100)):
+    X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)
+    train_games = X_train['game_id']
+    test_games = X_test['game_id']
+    X_train.drop(columns=['game_id'], inplace=True)
+    X_test.drop(columns=['game_id'], inplace=True)
+    train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
+    test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
+    param = {
+        'max_depth': 2,
+        'eta': 0.01,
+        'objective': 'multi:softprob',
+        'num_class': 2
+    }
+    epochs = 500
+    model = xgb.train(param, train, epochs)
+    predictions = model.predict(test)
+    y = []
+    for z in predictions:
+        y.append(np.argmax(z))
+    acc = round(accuracy_score(y_test, y)*100, 1)
+    acc_results.append(acc)
+    clear_output(wait=True)
+    print(f"Best accuracy: {max(acc_results)}%")
+    # only save results if they are the best so far
+    if acc == max(acc_results):
+        file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
+        with open(file_path,'wb') as f:
+            pkl.dump(train_games,f)
+        file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
+        with open(file_path,'wb') as f:
+            pkl.dump(test_games,f)
+        file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
+        model.save_model(file_path)
+print('Done')

Source/Train/xgboost_OU.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import xgboost as xgb
+import pandas as pd
+import pickle as pkl
+import numpy as np
+from tqdm import tqdm
+from IPython.display import clear_output
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+import os
+current_directory = os.path.dirname(os.path.abspath(__file__))
+parent_directory = os.path.dirname(current_directory)
+data_directory = os.path.join(parent_directory, 'Data')
+model_directory = os.path.join(parent_directory, 'Models')
+pickle_directory = os.path.join(parent_directory, 'Pickles')
+file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
+data = pd.read_csv(file_path, index_col=0).dropna()
+OU = data['Over']
+data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
+acc_results = []
+for x in tqdm(range(100)):
+    X_train, X_test, y_train, y_test = train_test_split(data, OU, test_size=.1)
+    train_games = X_train['game_id']
+    test_games = X_test['game_id']
+    X_train.drop(columns=['game_id'], inplace=True)
+    X_test.drop(columns=['game_id'], inplace=True)
+    train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
+    test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
+    param = {
+        'max_depth': 6,
+        'eta': 0.05,
+        'objective': 'multi:softprob',
+        'num_class': 3
+    }
+    epochs = 300
+    model = xgb.train(param, train, epochs)
+    predictions = model.predict(test)
+    y = []
+    for z in predictions:
+        y.append(np.argmax(z))
+    acc = round(accuracy_score(y_test, y)*100, 1)
+    acc_results.append(acc)
+    clear_output(wait=True)
+    print(f"Best accuracy: {max(acc_results)}%")
+    # only save results if they are the best so far
+    if acc == max(acc_results):
+        file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
+        with open(file_path,'wb') as f:
+            pkl.dump(train_games,f)
+        file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
+        with open(file_path,'wb') as f:
+            pkl.dump(test_games,f)
+        file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
+        model.save_model(file_path)
+print('Done')