sivapriya175 commited on
Commit
9f8cfb6
·
1 Parent(s): e11191e

deploy backend files

Browse files
Files changed (1) hide show
  1. models/train_model.py +22 -4
models/train_model.py CHANGED
@@ -6,7 +6,7 @@ from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
7
  from sklearn.preprocessing import StandardScaler
8
 
9
- # Load datasets
10
  ball_df = pd.read_csv('data/cleaned_ball_data.csv')
11
  match_df = pd.read_csv('data/cleaned_match_data.csv')
12
 
@@ -14,7 +14,25 @@ match_df = pd.read_csv('data/cleaned_match_data.csv')
14
  match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
15
  ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
16
 
17
- # Train Player Score Model (Without Saving .pkl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def train_player_score_model():
19
  player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
20
  player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
@@ -41,7 +59,7 @@ def train_player_score_model():
41
 
42
  return model, scaler
43
 
44
- # Train Team Performance Model (Without Saving .pkl)
45
  def train_team_performance_model():
46
  data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
47
  data['team1_index'] = data['team1'].astype('category').cat.codes
@@ -64,6 +82,6 @@ def train_team_performance_model():
64
 
65
  return win_model, score_model
66
 
67
- # Train the models dynamically (without .pkl files)
68
  player_score_model, player_scaler = train_player_score_model()
69
  team_win_model, team_score_model = train_team_performance_model()
 
6
  from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
7
  from sklearn.preprocessing import StandardScaler
8
 
9
+ # 🔹 Load datasets
10
  ball_df = pd.read_csv('data/cleaned_ball_data.csv')
11
  match_df = pd.read_csv('data/cleaned_match_data.csv')
12
 
 
14
  match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
15
  ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
16
 
17
+ # 🔹 Compute team total scores and merge correctly
18
+ team_scores = ball_df.groupby(['match_id', 'batting_team'])['total_runs'].sum().reset_index()
19
+ team_scores.rename(columns={'total_runs': 'team_total'}, inplace=True)
20
+
21
+ # Merge team scores with match_df
22
+ match_df = match_df.merge(team_scores, left_on=['id', 'team1'], right_on=['match_id', 'batting_team'], how='left')
23
+ match_df.rename(columns={'team_total': 'team1_total'}, inplace=True)
24
+
25
+ match_df = match_df.merge(team_scores, left_on=['id', 'team2'], right_on=['match_id', 'batting_team'], how='left')
26
+ match_df.rename(columns={'team_total': 'team2_total'}, inplace=True)
27
+
28
+ # Fill missing values with 0 to avoid KeyError
29
+ match_df['team1_total'] = match_df['team1_total'].fillna(0)
30
+ match_df['team2_total'] = match_df['team2_total'].fillna(0)
31
+
32
+ # Drop unnecessary columns
33
+ match_df.drop(columns=['batting_team', 'match_id'], errors='ignore', inplace=True)
34
+
35
+ # 🔹 Train Player Score Model
36
  def train_player_score_model():
37
  player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
38
  player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
 
59
 
60
  return model, scaler
61
 
62
+ # 🔹 Train Team Performance Model
63
  def train_team_performance_model():
64
  data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
65
  data['team1_index'] = data['team1'].astype('category').cat.codes
 
82
 
83
  return win_model, score_model
84
 
85
+ # 🔹 Train models dynamically
86
  player_score_model, player_scaler = train_player_score_model()
87
  team_win_model, team_score_model = train_team_performance_model()