marci / Source /Build /update.py
BraydenMoore's picture
update update.py
f12cc2b
import nfl_data_py.nfl_data_py as nfl
import build
import datetime as dt
import numpy as np
import io
import pandas as pd
pd.set_option('chained_assignment',None)
pd.set_option('display.max_columns',None)
import os
import pickle as pkl
import requests
from bs4 import BeautifulSoup
current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.dirname(current_directory)
data_directory = os.path.join(parent_directory, 'Data')
pickle_directory = os.path.join(parent_directory, 'Pickles')
# get team abbreviations
file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
with open(file_path, 'rb') as f:
team_name_to_abbreviation = pkl.load(f)
file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
with open(file_path, 'rb') as f:
team_abbreviation_to_name = pkl.load(f)
# get current season
year = dt.datetime.now().year
month = dt.datetime.now().month
current_season = year if month in [8,9,10,11,12] else year-1
# get schedule
print('Getting schedule.\n')
url = 'https://www.nbcsports.com/nfl/schedule'
df = pd.read_html(url)
file_path = os.path.join(pickle_directory, 'schedule.pkl')
with open(file_path, 'wb') as f:
pkl.dump(df, f)
def get_week():
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Dnt': '1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
}
url = 'https://www.nfl.com/schedules/'
resp = requests.get(url,headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')
h2_tags = soup.find_all('h2')
year = h2_tags[0].getText().split(' ')[0]
week = h2_tags[0].getText().split(' ')[-1]
if week=='CARD':
week = 18
return int(week), int(year)
def get_lines(season):
url = 'https://www.sportsbettingdime.com/nfl/public-betting-trends/'
response = requests.get(url)
html = BeautifulSoup(response.text)
week = html.find_all('h2')[0].get_text().split(' ')[-1]
df = pd.read_html(io.StringIO(response.text))
columns = list(df[0].loc[0,:].values)
columns = columns[:2] + columns[3:]
data_list = []
for data in df[1:-1]:
data.columns = columns
data['Matchup'] = data['Matchup'].str.extract('([A-Z]+)[^A-Za-z]*$')
data_dict = {
'season' : season,
'week' : week,
'home_team' : data['Matchup'][1],
'away_team' : data['Matchup'][0],
'away_spread' : float(data.iloc[0,4].replace('+','')),
'money_on_away_ats' : int(data.iloc[0,5].replace('%',''))/100,
'bets_on_away_ats' : int(data.iloc[0,6].replace('%',''))/100,
'away_moneyline' : int(data['moneyline'][0].replace('+','')),
'money_on_away_ml' : int(data.iloc[0,8].replace('%',''))/100,
'bets_on_away_ml' : int(data.iloc[0,9].replace('%',''))/100,
'over_under' : data['total'].str.replace('o','').str.replace('u','').astype(float).mean(),
'money_on_over' : int(data.iloc[0,11].replace('%',''))/100,
'bets_on_over' : int(data.iloc[0,12].replace('%',''))/100
}
data_list.append(data_dict)
betting_data = pd.DataFrame(data_list)
betting_data['key'] = [f'{season}_{week}_{away}_{home}' for season, week, away, home in betting_data[['season','week','away_team','home_team']].values]
return betting_data
current_week = get_week()[0]
the_week = {'week':current_week,
'year':current_season}
file_path = os.path.join(pickle_directory, 'the_week.pkl')
with open(file_path, 'wb') as f:
pkl.dump(the_week, f)
# update current season
build.build_gbg_data(get_seasons=[current_season])
build.add_odds_data()
# get winners
pbp = build.get_pbp_data([current_season])
pbp = pbp.drop_duplicates(subset='game_id')
pbp[['season','week','away','home']] = pbp['game_id'].str.split('_', expand=True)
games = pbp[['game_id','away_score','home_score','season','week','away','home']]
games[['away_score','home_score','season','week']] = games[['away_score','home_score','season','week']].astype(int)
games['away_team'] = games['away'].map(team_abbreviation_to_name)
games['home_team'] = games['home'].map(team_abbreviation_to_name)
games['total'] = games['away_score'] + games['home_score']
games['winner'] = [a if a_s>h_s else h if h_s>a_s else 'Tie' for a,h,a_s,h_s in games[['away_team','home_team','away_score','home_score']].values]
file_path = os.path.join(data_directory, 'results.csv')
games[['game_id','total','winner']].to_csv(file_path, index=False)