Jimin Park commited on
Commit
091ef73
·
1 Parent(s): 5cd1244

kermitting soon

Browse files
Files changed (1) hide show
  1. util/app.py +126 -2
util/app.py CHANGED
@@ -3,6 +3,17 @@ import gradio as gr
3
  import xgboost as xgb
4
  from huggingface_hub import hf_hub_download
5
  from app_training_df_getter import create_app_user_training_df
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  # Define champion list for dropdowns
@@ -60,6 +71,101 @@ def get_user_training_df(player_opgg_url):
60
 
61
  #return f"Error getting training data: {e}"
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def show_stats(player_opgg_url):
64
  """Display player statistics and recent matches"""
65
  if not player_opgg_url:
@@ -85,7 +191,7 @@ def show_stats(player_opgg_url):
85
 
86
  stats_html = f"""
87
  <div style='padding: 20px; background: #f5f5f5; border-radius: 10px;'>
88
- <h3>Player Stats</h3>
89
  <p>Wins: {wins} | Losses: {losses}</p>
90
  <p>Winrate: {winrate}</p>
91
  <p>Favorite Champions: {', '.join(favorite_champions)}</p>
@@ -110,14 +216,32 @@ def predict_champion(player_opgg_url, *champions):
110
 
111
  print("============= Inside predict_champion(): Model loaded properly=================\n")
112
 
113
- features = get_user_training_df(player_opgg_url)
114
 
115
  print("============= Inside predict_champion(): =================\n")
116
  print("features type: ", type(features), "\n features: \n", features, "\n")
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  print("Starting model prediction... \n")
119
  prediction = model.predict(features)
120
  predicted_champion = CHAMPIONS[prediction[0]]
 
121
  return f"Predicted champion: {predicted_champion}"
122
  except Exception as e:
123
  return f"Error making prediction: {e}"
 
3
  import xgboost as xgb
4
  from huggingface_hub import hf_hub_download
5
  from app_training_df_getter import create_app_user_training_df
6
+ import pandas as pd
7
+ import numpy as np
8
+ from collections import Counter
9
+ import os
10
+ from sklearn.model_selection import train_test_split, GridSearchCV
11
+ from sklearn.preprocessing import LabelEncoder
12
+ from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
13
+ from xgboost import XGBClassifier, plot_importance
14
+ import xgboost as xgb
15
+ from helper import *
16
+ import helper as helpfun
17
 
18
 
19
  # Define champion list for dropdowns
 
71
 
72
  #return f"Error getting training data: {e}"
73
 
74
+ def prepare_training_df(df, target_column='champion', stratify_columns=['champion', 'region'],
75
+ min_samples_per_class=6, train_size=0, val_size=1, random_state=42):
76
+ df = df.copy()
77
+ original_dtypes = df.dtypes.to_dict()
78
+
79
+ # Create composite stratification label
80
+ df['stratify_label'] = df[stratify_columns[0]].astype(str)
81
+ for col in stratify_columns[1:]:
82
+ df['stratify_label'] += '_' + df[col].astype(str)
83
+
84
+ # Handle categorical columns - store category mappings
85
+ categorical_columns = df.select_dtypes(include=['category']).columns.tolist()
86
+ if target_column in categorical_columns:
87
+ categorical_columns.remove(target_column)
88
+
89
+ category_mappings = {}
90
+ temp_encoded_df = df.copy()
91
+
92
+ # Convert categorical columns to codes but keep original data
93
+ for col in categorical_columns:
94
+ if col in df.columns:
95
+ category_mappings[col] = {
96
+ 'categories': df[col].cat.categories,
97
+ 'ordered': df[col].cat.ordered
98
+ }
99
+ temp_encoded_df[col] = df[col].cat.codes
100
+
101
+ # Remove combinations with too few samples
102
+ combo_counts = df['stratify_label'].value_counts()
103
+ valid_combos = combo_counts[combo_counts >= min_samples_per_class].index
104
+ print(type(valid_combos))
105
+
106
+ # Filter data
107
+ mask = df['stratify_label'].isin(valid_combos)
108
+ df_filtered = df[mask].copy()
109
+ temp_encoded_filtered = temp_encoded_df[mask].copy()
110
+
111
+ # Prepare features and target
112
+ feature_columns = [col for col in df_filtered.columns if col not in stratify_columns + ['stratify_label']]
113
+ X = temp_encoded_filtered[feature_columns] # Use encoded version for splitting
114
+ y = df_filtered[target_column]
115
+
116
+ # Encode target values
117
+ label_encoder = LabelEncoder()
118
+ y_encoded = label_encoder.fit_transform(y)
119
+
120
+ # Calculate test_size based on train and validation sizes
121
+ test_size = 1 - train_size - val_size
122
+ temp_size = val_size + test_size
123
+
124
+ # First split: training and temporary sets
125
+ X_train, X_temp, y_train, y_temp = train_test_split(
126
+ X, y_encoded,
127
+ test_size=temp_size,
128
+ random_state=random_state,
129
+ stratify=df_filtered['stratify_label']
130
+ )
131
+
132
+ # Get corresponding stratify labels for temp set
133
+ temp_indices = X_temp.index
134
+ temp_stratify = df_filtered.loc[temp_indices, 'stratify_label']
135
+
136
+ # Second split: validation and test sets
137
+ val_ratio = val_size / (val_size + test_size)
138
+ X_val, X_test, y_val, y_test = train_test_split(
139
+ X_temp, y_temp,
140
+ test_size=(1 - val_ratio),
141
+ random_state=random_state,
142
+ stratify=temp_stratify
143
+ )
144
+
145
+ print("X_val: ", X_val, "\n X_val type: ", type(X_val), "\n y_val: ", y_val, "\n y_val type: ", type(y_val))
146
+
147
+ # Restore categorical dtypes
148
+ for col in categorical_columns:
149
+ if col in X_train.columns:
150
+ X_train[col] = pd.Categorical.from_codes(
151
+ X_train[col],
152
+ categories=category_mappings[col]['categories'],
153
+ ordered=category_mappings[col]['ordered']
154
+ )
155
+ X_val[col] = pd.Categorical.from_codes(
156
+ X_val[col],
157
+ categories=category_mappings[col]['categories'],
158
+ ordered=category_mappings[col]['ordered']
159
+ )
160
+ X_test[col] = pd.Categorical.from_codes(
161
+ X_test[col],
162
+ categories=category_mappings[col]['categories'],
163
+ ordered=category_mappings[col]['ordered']
164
+ )
165
+
166
+
167
+ return X_train, X_val, X_test, y_train, y_val, y_test, label_encoder
168
+
169
  def show_stats(player_opgg_url):
170
  """Display player statistics and recent matches"""
171
  if not player_opgg_url:
 
191
 
192
  stats_html = f"""
193
  <div style='padding: 20px; background: #f5f5f5; border-radius: 10px;'>
194
+ <h3>Player's Recent Stats</h3>
195
  <p>Wins: {wins} | Losses: {losses}</p>
196
  <p>Winrate: {winrate}</p>
197
  <p>Favorite Champions: {', '.join(favorite_champions)}</p>
 
216
 
217
  print("============= Inside predict_champion(): Model loaded properly=================\n")
218
 
219
+ training_df = get_user_training_df(player_opgg_url)
220
 
221
  print("============= Inside predict_champion(): =================\n")
222
  print("features type: ", type(features), "\n features: \n", features, "\n")
223
 
224
+ print("=============== Inside predict_champion =================== \n")
225
+
226
+ training_df = convert_df(training_df)
227
+ features = apply_feature_engineering(training_df)
228
+ check_datatypes(training_df)
229
+
230
+ X_train, X_val, X_test, y_train, y_val, y_test, label_encoder = prepare_training_df(
231
+ training_df,
232
+ target_column='champion',
233
+ stratify_columns=['champion', 'region'],
234
+ min_samples_per_class=5,
235
+ train_size=0.6,
236
+ val_size=0.2,
237
+ random_state=42
238
+ )
239
+ print("type(X_test): ", type(X_test), "\n")
240
+
241
  print("Starting model prediction... \n")
242
  prediction = model.predict(features)
243
  predicted_champion = CHAMPIONS[prediction[0]]
244
+
245
  return f"Predicted champion: {predicted_champion}"
246
  except Exception as e:
247
  return f"Error making prediction: {e}"