Spaces:
Sleeping
Sleeping
Jimin Park
commited on
Commit
·
091ef73
1
Parent(s):
5cd1244
kermitting soon
Browse files- util/app.py +126 -2
util/app.py
CHANGED
@@ -3,6 +3,17 @@ import gradio as gr
|
|
3 |
import xgboost as xgb
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from app_training_df_getter import create_app_user_training_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
# Define champion list for dropdowns
|
@@ -60,6 +71,101 @@ def get_user_training_df(player_opgg_url):
|
|
60 |
|
61 |
#return f"Error getting training data: {e}"
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
def show_stats(player_opgg_url):
|
64 |
"""Display player statistics and recent matches"""
|
65 |
if not player_opgg_url:
|
@@ -85,7 +191,7 @@ def show_stats(player_opgg_url):
|
|
85 |
|
86 |
stats_html = f"""
|
87 |
<div style='padding: 20px; background: #f5f5f5; border-radius: 10px;'>
|
88 |
-
<h3>Player Stats</h3>
|
89 |
<p>Wins: {wins} | Losses: {losses}</p>
|
90 |
<p>Winrate: {winrate}</p>
|
91 |
<p>Favorite Champions: {', '.join(favorite_champions)}</p>
|
@@ -110,14 +216,32 @@ def predict_champion(player_opgg_url, *champions):
|
|
110 |
|
111 |
print("============= Inside predict_champion(): Model loaded properly=================\n")
|
112 |
|
113 |
-
|
114 |
|
115 |
print("============= Inside predict_champion(): =================\n")
|
116 |
print("features type: ", type(features), "\n features: \n", features, "\n")
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
print("Starting model prediction... \n")
|
119 |
prediction = model.predict(features)
|
120 |
predicted_champion = CHAMPIONS[prediction[0]]
|
|
|
121 |
return f"Predicted champion: {predicted_champion}"
|
122 |
except Exception as e:
|
123 |
return f"Error making prediction: {e}"
|
|
|
3 |
import xgboost as xgb
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from app_training_df_getter import create_app_user_training_df
|
6 |
+
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
+
from collections import Counter
|
9 |
+
import os
|
10 |
+
from sklearn.model_selection import train_test_split, GridSearchCV
|
11 |
+
from sklearn.preprocessing import LabelEncoder
|
12 |
+
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
|
13 |
+
from xgboost import XGBClassifier, plot_importance
|
14 |
+
import xgboost as xgb
|
15 |
+
from helper import *
|
16 |
+
import helper as helpfun
|
17 |
|
18 |
|
19 |
# Define champion list for dropdowns
|
|
|
71 |
|
72 |
#return f"Error getting training data: {e}"
|
73 |
|
74 |
+
def prepare_training_df(df, target_column='champion', stratify_columns=['champion', 'region'],
|
75 |
+
min_samples_per_class=6, train_size=0, val_size=1, random_state=42):
|
76 |
+
df = df.copy()
|
77 |
+
original_dtypes = df.dtypes.to_dict()
|
78 |
+
|
79 |
+
# Create composite stratification label
|
80 |
+
df['stratify_label'] = df[stratify_columns[0]].astype(str)
|
81 |
+
for col in stratify_columns[1:]:
|
82 |
+
df['stratify_label'] += '_' + df[col].astype(str)
|
83 |
+
|
84 |
+
# Handle categorical columns - store category mappings
|
85 |
+
categorical_columns = df.select_dtypes(include=['category']).columns.tolist()
|
86 |
+
if target_column in categorical_columns:
|
87 |
+
categorical_columns.remove(target_column)
|
88 |
+
|
89 |
+
category_mappings = {}
|
90 |
+
temp_encoded_df = df.copy()
|
91 |
+
|
92 |
+
# Convert categorical columns to codes but keep original data
|
93 |
+
for col in categorical_columns:
|
94 |
+
if col in df.columns:
|
95 |
+
category_mappings[col] = {
|
96 |
+
'categories': df[col].cat.categories,
|
97 |
+
'ordered': df[col].cat.ordered
|
98 |
+
}
|
99 |
+
temp_encoded_df[col] = df[col].cat.codes
|
100 |
+
|
101 |
+
# Remove combinations with too few samples
|
102 |
+
combo_counts = df['stratify_label'].value_counts()
|
103 |
+
valid_combos = combo_counts[combo_counts >= min_samples_per_class].index
|
104 |
+
print(type(valid_combos))
|
105 |
+
|
106 |
+
# Filter data
|
107 |
+
mask = df['stratify_label'].isin(valid_combos)
|
108 |
+
df_filtered = df[mask].copy()
|
109 |
+
temp_encoded_filtered = temp_encoded_df[mask].copy()
|
110 |
+
|
111 |
+
# Prepare features and target
|
112 |
+
feature_columns = [col for col in df_filtered.columns if col not in stratify_columns + ['stratify_label']]
|
113 |
+
X = temp_encoded_filtered[feature_columns] # Use encoded version for splitting
|
114 |
+
y = df_filtered[target_column]
|
115 |
+
|
116 |
+
# Encode target values
|
117 |
+
label_encoder = LabelEncoder()
|
118 |
+
y_encoded = label_encoder.fit_transform(y)
|
119 |
+
|
120 |
+
# Calculate test_size based on train and validation sizes
|
121 |
+
test_size = 1 - train_size - val_size
|
122 |
+
temp_size = val_size + test_size
|
123 |
+
|
124 |
+
# First split: training and temporary sets
|
125 |
+
X_train, X_temp, y_train, y_temp = train_test_split(
|
126 |
+
X, y_encoded,
|
127 |
+
test_size=temp_size,
|
128 |
+
random_state=random_state,
|
129 |
+
stratify=df_filtered['stratify_label']
|
130 |
+
)
|
131 |
+
|
132 |
+
# Get corresponding stratify labels for temp set
|
133 |
+
temp_indices = X_temp.index
|
134 |
+
temp_stratify = df_filtered.loc[temp_indices, 'stratify_label']
|
135 |
+
|
136 |
+
# Second split: validation and test sets
|
137 |
+
val_ratio = val_size / (val_size + test_size)
|
138 |
+
X_val, X_test, y_val, y_test = train_test_split(
|
139 |
+
X_temp, y_temp,
|
140 |
+
test_size=(1 - val_ratio),
|
141 |
+
random_state=random_state,
|
142 |
+
stratify=temp_stratify
|
143 |
+
)
|
144 |
+
|
145 |
+
print("X_val: ", X_val, "\n X_val type: ", type(X_val), "\n y_val: ", y_val, "\n y_val type: ", type(y_val))
|
146 |
+
|
147 |
+
# Restore categorical dtypes
|
148 |
+
for col in categorical_columns:
|
149 |
+
if col in X_train.columns:
|
150 |
+
X_train[col] = pd.Categorical.from_codes(
|
151 |
+
X_train[col],
|
152 |
+
categories=category_mappings[col]['categories'],
|
153 |
+
ordered=category_mappings[col]['ordered']
|
154 |
+
)
|
155 |
+
X_val[col] = pd.Categorical.from_codes(
|
156 |
+
X_val[col],
|
157 |
+
categories=category_mappings[col]['categories'],
|
158 |
+
ordered=category_mappings[col]['ordered']
|
159 |
+
)
|
160 |
+
X_test[col] = pd.Categorical.from_codes(
|
161 |
+
X_test[col],
|
162 |
+
categories=category_mappings[col]['categories'],
|
163 |
+
ordered=category_mappings[col]['ordered']
|
164 |
+
)
|
165 |
+
|
166 |
+
|
167 |
+
return X_train, X_val, X_test, y_train, y_val, y_test, label_encoder
|
168 |
+
|
169 |
def show_stats(player_opgg_url):
|
170 |
"""Display player statistics and recent matches"""
|
171 |
if not player_opgg_url:
|
|
|
191 |
|
192 |
stats_html = f"""
|
193 |
<div style='padding: 20px; background: #f5f5f5; border-radius: 10px;'>
|
194 |
+
<h3>Player's Recent Stats</h3>
|
195 |
<p>Wins: {wins} | Losses: {losses}</p>
|
196 |
<p>Winrate: {winrate}</p>
|
197 |
<p>Favorite Champions: {', '.join(favorite_champions)}</p>
|
|
|
216 |
|
217 |
print("============= Inside predict_champion(): Model loaded properly=================\n")
|
218 |
|
219 |
+
training_df = get_user_training_df(player_opgg_url)
|
220 |
|
221 |
print("============= Inside predict_champion(): =================\n")
|
222 |
print("features type: ", type(features), "\n features: \n", features, "\n")
|
223 |
|
224 |
+
print("=============== Inside predict_champion =================== \n")
|
225 |
+
|
226 |
+
training_df = convert_df(training_df)
|
227 |
+
features = apply_feature_engineering(training_df)
|
228 |
+
check_datatypes(training_df)
|
229 |
+
|
230 |
+
X_train, X_val, X_test, y_train, y_val, y_test, label_encoder = prepare_training_df(
|
231 |
+
training_df,
|
232 |
+
target_column='champion',
|
233 |
+
stratify_columns=['champion', 'region'],
|
234 |
+
min_samples_per_class=5,
|
235 |
+
train_size=0.6,
|
236 |
+
val_size=0.2,
|
237 |
+
random_state=42
|
238 |
+
)
|
239 |
+
print("type(X_test): ", type(X_test), "\n")
|
240 |
+
|
241 |
print("Starting model prediction... \n")
|
242 |
prediction = model.predict(features)
|
243 |
predicted_champion = CHAMPIONS[prediction[0]]
|
244 |
+
|
245 |
return f"Predicted champion: {predicted_champion}"
|
246 |
except Exception as e:
|
247 |
return f"Error making prediction: {e}"
|