sivapriya175
commited on
Commit
·
9f8cfb6
1
Parent(s):
e11191e
deploy backend files
Browse files- models/train_model.py +22 -4
models/train_model.py
CHANGED
@@ -6,7 +6,7 @@ from sklearn.model_selection import train_test_split
|
|
6 |
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
|
9 |
-
# Load datasets
|
10 |
ball_df = pd.read_csv('data/cleaned_ball_data.csv')
|
11 |
match_df = pd.read_csv('data/cleaned_match_data.csv')
|
12 |
|
@@ -14,7 +14,25 @@ match_df = pd.read_csv('data/cleaned_match_data.csv')
|
|
14 |
match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
|
15 |
ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
|
16 |
|
17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def train_player_score_model():
|
19 |
player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
|
20 |
player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
|
@@ -41,7 +59,7 @@ def train_player_score_model():
|
|
41 |
|
42 |
return model, scaler
|
43 |
|
44 |
-
# Train Team Performance Model
|
45 |
def train_team_performance_model():
|
46 |
data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
|
47 |
data['team1_index'] = data['team1'].astype('category').cat.codes
|
@@ -64,6 +82,6 @@ def train_team_performance_model():
|
|
64 |
|
65 |
return win_model, score_model
|
66 |
|
67 |
-
# Train
|
68 |
player_score_model, player_scaler = train_player_score_model()
|
69 |
team_win_model, team_score_model = train_team_performance_model()
|
|
|
6 |
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
|
9 |
+
# 🔹 Load datasets
|
10 |
ball_df = pd.read_csv('data/cleaned_ball_data.csv')
|
11 |
match_df = pd.read_csv('data/cleaned_match_data.csv')
|
12 |
|
|
|
14 |
match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
|
15 |
ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
|
16 |
|
17 |
+
# 🔹 Compute team total scores and merge correctly
|
18 |
+
team_scores = ball_df.groupby(['match_id', 'batting_team'])['total_runs'].sum().reset_index()
|
19 |
+
team_scores.rename(columns={'total_runs': 'team_total'}, inplace=True)
|
20 |
+
|
21 |
+
# Merge team scores with match_df
|
22 |
+
match_df = match_df.merge(team_scores, left_on=['id', 'team1'], right_on=['match_id', 'batting_team'], how='left')
|
23 |
+
match_df.rename(columns={'team_total': 'team1_total'}, inplace=True)
|
24 |
+
|
25 |
+
match_df = match_df.merge(team_scores, left_on=['id', 'team2'], right_on=['match_id', 'batting_team'], how='left')
|
26 |
+
match_df.rename(columns={'team_total': 'team2_total'}, inplace=True)
|
27 |
+
|
28 |
+
# Fill missing values with 0 to avoid KeyError
|
29 |
+
match_df['team1_total'] = match_df['team1_total'].fillna(0)
|
30 |
+
match_df['team2_total'] = match_df['team2_total'].fillna(0)
|
31 |
+
|
32 |
+
# Drop unnecessary columns
|
33 |
+
match_df.drop(columns=['batting_team', 'match_id'], errors='ignore', inplace=True)
|
34 |
+
|
35 |
+
# 🔹 Train Player Score Model
|
36 |
def train_player_score_model():
|
37 |
player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
|
38 |
player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
|
|
|
59 |
|
60 |
return model, scaler
|
61 |
|
62 |
+
# 🔹 Train Team Performance Model
|
63 |
def train_team_performance_model():
|
64 |
data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
|
65 |
data['team1_index'] = data['team1'].astype('category').cat.codes
|
|
|
82 |
|
83 |
return win_model, score_model
|
84 |
|
85 |
+
# 🔹 Train models dynamically
|
86 |
player_score_model, player_scaler = train_player_score_model()
|
87 |
team_win_model, team_score_model = train_team_performance_model()
|