import xgboost as xgb import numpy as np import pandas as pd import pickle as pkl import os import requests from bs4 import BeautifulSoup current_directory = os.path.dirname(os.path.abspath(__file__)) parent_directory = os.path.dirname(current_directory) data_directory = os.path.join(parent_directory, 'Data') model_directory = os.path.join(parent_directory, 'Models') pickle_directory = os.path.join(parent_directory, 'Pickles') file_path = os.path.join(data_directory, 'gbg_this_year.csv') gbg = pd.read_csv(file_path, low_memory=False) # get team abbreviations file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl') with open(file_path, 'rb') as f: team_name_to_abbreviation = pkl.load(f) file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl') with open(file_path, 'rb') as f: team_abbreviation_to_name = pkl.load(f) def get_week(): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Dnt': '1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' } url = 'https://www.nfl.com/schedules/' resp = requests.get(url,headers=headers) soup = BeautifulSoup(resp.text, 'html.parser') h2_tags = soup.find_all('h2') year = h2_tags[0].getText().split(' ')[0] week = h2_tags[0].getText().split(' ')[-1] return int(week), int(year) def get_games(): # pull from NBC url = 'https://www.nbcsports.com/nfl/schedule' df = pd.read_html(url)[0] df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']] df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']] df['Date'] = pd.to_datetime(df['Game TimeGame Time']) df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p') df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize()) return df[['Away Team','Home Team','Date']] def get_one_week(home,away,season,week): try: home_df = gbg.loc[((gbg['away_team']==home) | (gbg['home_team']==home)) & (gbg['Season']==season) & (gbg['GP']==week-1)] home_df = home_df[[i for i in home_df.columns if '.Away' not in i] if home_df['home_team'].item()==home else [i for i in home_df.columns if '.Away' in i]] home_df.columns = [i.replace('.Away','') for i in home_df.columns] away_df = gbg.loc[((gbg['away_team']==away) | (gbg['home_team']==away)) & (gbg['Season']==season) & (gbg['GP']==week-1)] away_df = away_df[[i for i in away_df.columns if '.Away' not in i] if away_df['home_team'].item()==away else [i for i in away_df.columns if '.Away' in i]] away_df.columns = [i.replace('.Away','') + '.Away' for i in away_df.columns] drop_columns = ['game_id', 'Season', 'home_team', 'away_team', 'game_date'] df = home_df.merge(away_df, left_on='GP', right_on='GP.Away').drop(columns=drop_columns) return df except ValueError: return pd.DataFrame() def predict(home,away,season,week,total): # finish preparing data home_abbrev = team_name_to_abbreviation[home] away_abbrev = team_name_to_abbreviation[away] data = get_one_week(home_abbrev,away_abbrev,season,week) data['Total Score Close'] = total matrix = xgb.DMatrix(data.astype(float).values) # moneyline model = 'xgboost_ML_no_odds_69.8%' file_path = os.path.join(model_directory, f'{model}.json') xgb_ml = xgb.Booster() xgb_ml.load_model(file_path) try: ml_predicted_proba = xgb_ml.predict(matrix)[0][1] print(xgb_ml.predict(matrix)) winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba]).item() moneyline = {'Winner': [home if ml_predicted_proba>0.6 else away if ml_predicted_proba<0.4 else 'Toss-Up'], 'Probabilities':[winner_proba]} except: moneyline = {'Winner': 'NA', 'Probabilities':['N/A']} # over/under model = 'xgboost_OU_no_odds_60.8%' file_path = os.path.join(model_directory, f'{model}.json') xgb_ou = xgb.Booster() xgb_ou.load_model(file_path) try: ou_predicted_proba = xgb_ou.predict(matrix)[0][1] ou_proba = max([ou_predicted_proba, 1-ou_predicted_proba]).item() over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'], 'Probability': [ou_proba]} except: over_under = {'Over/Under': 'N/A', 'Probability': ['N/A']} # create game id to save predictions game_id = str(season) + '_' + str(week) + '_' + away + '_' + home return game_id, moneyline, over_under