Spaces:

Chittrarasu
/

Cricket-match-prediction-FastAPI

Sleeping

App Files Files Community

sivapriya175 commited on Mar 19

Commit

9f8cfb6

1 Parent(s): e11191e

deploy backend files

Browse files

Files changed (1) hide show

models/train_model.py +22 -4

models/train_model.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
 from sklearn.preprocessing import StandardScaler
-# Load datasets
 ball_df = pd.read_csv('data/cleaned_ball_data.csv')
 match_df = pd.read_csv('data/cleaned_match_data.csv')
@@ -14,7 +14,25 @@ match_df = pd.read_csv('data/cleaned_match_data.csv')
 match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
 ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
-# Train Player Score Model (Without Saving .pkl)
 def train_player_score_model():
     player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
     player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
@@ -41,7 +59,7 @@ def train_player_score_model():
     return model, scaler
-# Train Team Performance Model (Without Saving .pkl)
 def train_team_performance_model():
     data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
     data['team1_index'] = data['team1'].astype('category').cat.codes
@@ -64,6 +82,6 @@ def train_team_performance_model():
     return win_model, score_model
-# Train the models dynamically (without .pkl files)
 player_score_model, player_scaler = train_player_score_model()
 team_win_model, team_score_model = train_team_performance_model()

 from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
 from sklearn.preprocessing import StandardScaler
+# 🔹 Load datasets
 ball_df = pd.read_csv('data/cleaned_ball_data.csv')
 match_df = pd.read_csv('data/cleaned_match_data.csv')
 match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
 ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
+# 🔹 Compute team total scores and merge correctly
+team_scores = ball_df.groupby(['match_id', 'batting_team'])['total_runs'].sum().reset_index()
+team_scores.rename(columns={'total_runs': 'team_total'}, inplace=True)
+# Merge team scores with match_df
+match_df = match_df.merge(team_scores, left_on=['id', 'team1'], right_on=['match_id', 'batting_team'], how='left')
+match_df.rename(columns={'team_total': 'team1_total'}, inplace=True)
+match_df = match_df.merge(team_scores, left_on=['id', 'team2'], right_on=['match_id', 'batting_team'], how='left')
+match_df.rename(columns={'team_total': 'team2_total'}, inplace=True)
+# Fill missing values with 0 to avoid KeyError
+match_df['team1_total'] = match_df['team1_total'].fillna(0)
+match_df['team2_total'] = match_df['team2_total'].fillna(0)
+# Drop unnecessary columns
+match_df.drop(columns=['batting_team', 'match_id'], errors='ignore', inplace=True)
+# 🔹 Train Player Score Model
 def train_player_score_model():
     player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
     player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
     return model, scaler
+# 🔹 Train Team Performance Model
 def train_team_performance_model():
     data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
     data['team1_index'] = data['team1'].astype('category').cat.codes
     return win_model, score_model
+# 🔹 Train models dynamically
 player_score_model, player_scaler = train_player_score_model()
 team_win_model, team_score_model = train_team_performance_model()