Spaces:
Sleeping
Sleeping
Jimin Park
commited on
Commit
·
c407d91
1
Parent(s):
5cee989
kermitting soon
Browse files- util/app.py +85 -169
util/app.py
CHANGED
@@ -43,13 +43,18 @@ try:
|
|
43 |
except Exception as e:
|
44 |
print(f"Error loading model: {e}")
|
45 |
model = None
|
46 |
-
|
47 |
try:
|
48 |
label_encoder = joblib.load('util/label_encoder.joblib')
|
49 |
print("Label encoder loaded successfully")
|
50 |
except Exception as e:
|
51 |
print(f"Error loading label encoder: {e}")
|
52 |
label_encoder = None
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
#==================================== Functions =================================================
|
55 |
def get_user_training_df(player_opgg_url):
|
@@ -73,102 +78,6 @@ def get_user_training_df(player_opgg_url):
|
|
73 |
|
74 |
#return f"Error getting training data: {e}"
|
75 |
|
76 |
-
def prepare_training_df(df, target_column='champion', stratify_columns=['champion', 'region'],
|
77 |
-
min_samples_per_class=2, train_size=0.6, val_size=0.2, random_state=42):
|
78 |
-
df = df.copy()
|
79 |
-
original_dtypes = df.dtypes.to_dict()
|
80 |
-
|
81 |
-
# Create composite stratification label
|
82 |
-
df['stratify_label'] = df[stratify_columns[0]].astype(str)
|
83 |
-
for col in stratify_columns[1:]:
|
84 |
-
df['stratify_label'] += '_' + df[col].astype(str)
|
85 |
-
|
86 |
-
# Handle categorical columns - store category mappings
|
87 |
-
categorical_columns = df.select_dtypes(include=['category']).columns.tolist()
|
88 |
-
if target_column in categorical_columns:
|
89 |
-
categorical_columns.remove(target_column)
|
90 |
-
|
91 |
-
category_mappings = {}
|
92 |
-
temp_encoded_df = df.copy()
|
93 |
-
'''
|
94 |
-
# Convert categorical columns to codes but keep original data
|
95 |
-
for col in categorical_columns:
|
96 |
-
if col in df.columns:
|
97 |
-
category_mappings[col] = {
|
98 |
-
'categories': df[col].cat.categories,
|
99 |
-
'ordered': df[col].cat.ordered
|
100 |
-
}
|
101 |
-
temp_encoded_df[col] = df[col].cat.codes
|
102 |
-
'''
|
103 |
-
# Remove combinations with too few samples
|
104 |
-
combo_counts = df['stratify_label'].value_counts()
|
105 |
-
valid_combos = combo_counts[combo_counts >= min_samples_per_class].index
|
106 |
-
print(type(valid_combos))
|
107 |
-
|
108 |
-
# Filter data
|
109 |
-
mask = df['stratify_label'].isin(valid_combos)
|
110 |
-
df_filtered = df[mask].copy()
|
111 |
-
temp_encoded_filtered = temp_encoded_df[mask].copy()
|
112 |
-
|
113 |
-
# Prepare features and target
|
114 |
-
feature_columns = [col for col in df_filtered.columns if col not in stratify_columns + ['stratify_label']]
|
115 |
-
X = temp_encoded_filtered[feature_columns] # Use encoded version for splitting
|
116 |
-
y = df_filtered[target_column]
|
117 |
-
|
118 |
-
# Encode target values
|
119 |
-
label_encoder = LabelEncoder()
|
120 |
-
y_encoded = label_encoder.fit_transform(y)
|
121 |
-
|
122 |
-
# Calculate test_size based on train and validation sizes
|
123 |
-
test_size = 1 - train_size - val_size
|
124 |
-
temp_size = val_size + test_size
|
125 |
-
|
126 |
-
# First split: training and temporary sets
|
127 |
-
X_train, X_temp, y_train, y_temp = train_test_split(
|
128 |
-
X, y_encoded,
|
129 |
-
test_size=temp_size,
|
130 |
-
random_state=random_state,
|
131 |
-
stratify=df_filtered['stratify_label']
|
132 |
-
)
|
133 |
-
|
134 |
-
# Get corresponding stratify labels for temp set
|
135 |
-
temp_indices = X_temp.index
|
136 |
-
temp_stratify = df_filtered.loc[temp_indices, 'stratify_label']
|
137 |
-
|
138 |
-
# Second split: validation and test sets
|
139 |
-
val_ratio = val_size / (val_size + test_size)
|
140 |
-
X_val, X_test, y_val, y_test = train_test_split(
|
141 |
-
X_temp, y_temp,
|
142 |
-
test_size=(1 - val_ratio),
|
143 |
-
random_state=random_state,
|
144 |
-
stratify=temp_stratify
|
145 |
-
)
|
146 |
-
|
147 |
-
print("X_val: ", X_val, "\n X_val type: ", type(X_val), "\n y_val: ", y_val, "\n y_val type: ", type(y_val))
|
148 |
-
|
149 |
-
'''
|
150 |
-
# Restore categorical dtypes
|
151 |
-
for col in categorical_columns:
|
152 |
-
if col in X_train.columns:
|
153 |
-
X_train[col] = pd.Categorical.from_codes(
|
154 |
-
X_train[col],
|
155 |
-
categories=category_mappings[col]['categories'],
|
156 |
-
ordered=category_mappings[col]['ordered']
|
157 |
-
)
|
158 |
-
X_val[col] = pd.Categorical.from_codes(
|
159 |
-
X_val[col],
|
160 |
-
categories=category_mappings[col]['categories'],
|
161 |
-
ordered=category_mappings[col]['ordered']
|
162 |
-
)
|
163 |
-
X_test[col] = pd.Categorical.from_codes(
|
164 |
-
X_test[col],
|
165 |
-
categories=category_mappings[col]['categories'],
|
166 |
-
ordered=category_mappings[col]['ordered']
|
167 |
-
)
|
168 |
-
'''
|
169 |
-
|
170 |
-
return X_train, X_val, X_test, y_train, y_val, y_test, label_encoder
|
171 |
-
|
172 |
def show_stats(player_opgg_url):
|
173 |
"""Display player statistics and recent matches"""
|
174 |
if not player_opgg_url:
|
@@ -205,6 +114,82 @@ def show_stats(player_opgg_url):
|
|
205 |
except Exception as e:
|
206 |
return f"Error processing stats: {e}. ", None
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
def predict_champion(player_opgg_url, *champions):
|
209 |
"""Make prediction based on selected champions"""
|
210 |
|
@@ -307,77 +292,7 @@ def predict_champion(player_opgg_url, *champions):
|
|
307 |
import traceback
|
308 |
print(f"Full error trace:\n{traceback.format_exc()}")
|
309 |
return f"Error making prediction: {e}"
|
310 |
-
|
311 |
-
def predict_champion_NOT_IN_USE(player_opgg_url, *champions):
|
312 |
-
"""Make prediction based on selected champions"""
|
313 |
-
|
314 |
-
print("============= Inside predict_champion()=================\n")
|
315 |
-
|
316 |
-
if not player_opgg_url or None in champions:
|
317 |
-
return "Please fill in all fields"
|
318 |
-
|
319 |
-
try:
|
320 |
-
if model is None:
|
321 |
-
return "Model not loaded properly"
|
322 |
-
|
323 |
-
training_df = get_user_training_df(player_opgg_url)
|
324 |
-
|
325 |
-
print("training_df type: ", type(training_df), "\n training_df: \n", training_df , "\n")
|
326 |
-
|
327 |
-
training_df = convert_df(training_df)
|
328 |
-
training_df = apply_feature_engineering(training_df)
|
329 |
-
check_datatypes(training_df)
|
330 |
-
|
331 |
-
X_train, X_val, X_test, y_train, y_val, y_test, label_encoder = prepare_training_df(
|
332 |
-
training_df,
|
333 |
-
target_column='champion',
|
334 |
-
stratify_columns=['champion', 'region'],
|
335 |
-
min_samples_per_class=5,
|
336 |
-
train_size=0.6,
|
337 |
-
val_size=0.2,
|
338 |
-
random_state=42
|
339 |
-
)
|
340 |
-
print("type(X_test): ", type(X_test), "\n")
|
341 |
-
|
342 |
-
# Handle categorical features
|
343 |
-
categorical_columns = X_val.select_dtypes(include=['category']).columns
|
344 |
-
X_val_processed = X_val.copy()
|
345 |
-
|
346 |
-
# Convert categorical columns to numeric
|
347 |
-
for col in categorical_columns:
|
348 |
-
X_val_processed[col] = X_val_processed[col].cat.codes
|
349 |
-
|
350 |
-
# Convert to float32
|
351 |
-
X_val_processed = X_val_processed.astype('float32')
|
352 |
-
|
353 |
-
# Create DMatrix with categorical feature support from pandas dataframe.
|
354 |
-
dtest = DMatrix(X_val_processed, enable_categorical=True)
|
355 |
-
|
356 |
-
print("type(X_test) after converting to DMatrix: ", type(X_test), "\n")
|
357 |
-
|
358 |
-
print("Starting model prediction... \n")
|
359 |
-
|
360 |
-
predictions = model.predict(dtest)
|
361 |
-
print("Previous line: predictions = model.predict(dtest). \n prediction: ", predictions , "\n")
|
362 |
-
|
363 |
-
'''
|
364 |
-
# Get the highest probability prediction
|
365 |
-
if len(predictions.shape) > 1:
|
366 |
-
pred_indices = predictions.argmax(axis=1)
|
367 |
-
else:
|
368 |
-
pred_indices = predictions.astype(int)
|
369 |
-
|
370 |
-
# Decode predictions using loaded label encoder
|
371 |
-
decoded_preds = label_encoder.inverse_transform(pred_indices)
|
372 |
-
'''
|
373 |
-
|
374 |
-
# Decode predictions (if using LabelEncoder)
|
375 |
-
decoded_preds = label_encoder.inverse_transform(predictions)
|
376 |
-
print("decoded_preds: ", decoded_preds, "\n")
|
377 |
-
|
378 |
-
return f"Predicted champion: {decoded_preds}"
|
379 |
-
except Exception as e:
|
380 |
-
return f"Error making prediction: {e}"
|
381 |
|
382 |
# Define your interface
|
383 |
with gr.Blocks() as demo:
|
@@ -414,8 +329,9 @@ with gr.Blocks() as demo:
|
|
414 |
outputs=prediction_output
|
415 |
)
|
416 |
|
|
|
|
|
417 |
# Enable queuing
|
418 |
-
#demo.queue(debug = True)
|
419 |
demo.launch(debug=True)
|
420 |
|
421 |
# For local testing
|
|
|
43 |
except Exception as e:
|
44 |
print(f"Error loading model: {e}")
|
45 |
model = None
|
46 |
+
'''
|
47 |
try:
|
48 |
label_encoder = joblib.load('util/label_encoder.joblib')
|
49 |
print("Label encoder loaded successfully")
|
50 |
except Exception as e:
|
51 |
print(f"Error loading label encoder: {e}")
|
52 |
label_encoder = None
|
53 |
+
'''
|
54 |
+
# Initialize champion name encoder
|
55 |
+
champion_encoder = LabelEncoder()
|
56 |
+
champion_encoder.fit(CHAMPIONS)
|
57 |
+
|
58 |
|
59 |
#==================================== Functions =================================================
|
60 |
def get_user_training_df(player_opgg_url):
|
|
|
78 |
|
79 |
#return f"Error getting training data: {e}"
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def show_stats(player_opgg_url):
|
82 |
"""Display player statistics and recent matches"""
|
83 |
if not player_opgg_url:
|
|
|
114 |
except Exception as e:
|
115 |
return f"Error processing stats: {e}. ", None
|
116 |
|
117 |
+
def predict_champion(player_opgg_url, *champions):
|
118 |
+
"""Make prediction based on selected champions"""
|
119 |
+
if not player_opgg_url or None in champions:
|
120 |
+
return "Please fill in all fields"
|
121 |
+
|
122 |
+
try:
|
123 |
+
if model is None:
|
124 |
+
return "Model not loaded properly"
|
125 |
+
|
126 |
+
if label_encoder is None:
|
127 |
+
return "Label encoder not loaded properly"
|
128 |
+
|
129 |
+
# Get and process the data
|
130 |
+
training_df = get_user_training_df(player_opgg_url)
|
131 |
+
|
132 |
+
if isinstance(training_df, str):
|
133 |
+
return training_df
|
134 |
+
|
135 |
+
training_df = convert_df(training_df)
|
136 |
+
training_df = apply_feature_engineering(training_df)
|
137 |
+
|
138 |
+
# Get feature columns
|
139 |
+
feature_columns = [col for col in training_df.columns
|
140 |
+
if col not in ['champion', 'region', 'stratify_label']]
|
141 |
+
X = training_df[feature_columns]
|
142 |
+
|
143 |
+
# Handle categorical features
|
144 |
+
categorical_columns = X.select_dtypes(include=['category']).columns
|
145 |
+
X_processed = X.copy()
|
146 |
+
|
147 |
+
for col in categorical_columns:
|
148 |
+
X_processed[col] = X_processed[col].cat.codes
|
149 |
+
|
150 |
+
X_processed = X_processed.astype('float32')
|
151 |
+
|
152 |
+
# Create DMatrix and predict
|
153 |
+
dtest = DMatrix(X_processed, enable_categorical=True)
|
154 |
+
predictions = model.predict(dtest)
|
155 |
+
|
156 |
+
# Get prediction indices
|
157 |
+
if len(predictions.shape) > 1:
|
158 |
+
pred_indices = predictions.argmax(axis=1)
|
159 |
+
else:
|
160 |
+
pred_indices = predictions.astype(int)
|
161 |
+
|
162 |
+
# First get the numeric ID from the original label encoder
|
163 |
+
decoded_numeric = label_encoder.inverse_transform(pred_indices)
|
164 |
+
|
165 |
+
# Map numeric ID to index in CHAMPIONS list
|
166 |
+
# Since your label encoder seems to use champion IDs, we need to map these to list indices
|
167 |
+
try:
|
168 |
+
# Get the first prediction
|
169 |
+
champion_id = int(decoded_numeric[0])
|
170 |
+
|
171 |
+
# Print debug information
|
172 |
+
print(f"Champion ID from model: {champion_id}")
|
173 |
+
|
174 |
+
# Find the closest matching index
|
175 |
+
# Note: This assumes champion IDs roughly correspond to their position in the list
|
176 |
+
champion_index = min(max(champion_id - 1, 0), len(CHAMPIONS) - 1)
|
177 |
+
predicted_champion = CHAMPIONS[champion_index]
|
178 |
+
|
179 |
+
print(f"Mapped to champion: {predicted_champion}")
|
180 |
+
|
181 |
+
return f"Predicted champion: {predicted_champion}"
|
182 |
+
|
183 |
+
except Exception as e:
|
184 |
+
print(f"Error mapping champion ID: {e}")
|
185 |
+
return f"Error: Could not map champion ID {decoded_numeric[0]}"
|
186 |
+
|
187 |
+
except Exception as e:
|
188 |
+
import traceback
|
189 |
+
print(f"Full error trace:\n{traceback.format_exc()}")
|
190 |
+
return f"Error making prediction: {e}"
|
191 |
+
|
192 |
+
''' current working function!!!!!!
|
193 |
def predict_champion(player_opgg_url, *champions):
|
194 |
"""Make prediction based on selected champions"""
|
195 |
|
|
|
292 |
import traceback
|
293 |
print(f"Full error trace:\n{traceback.format_exc()}")
|
294 |
return f"Error making prediction: {e}"
|
295 |
+
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
# Define your interface
|
298 |
with gr.Blocks() as demo:
|
|
|
329 |
outputs=prediction_output
|
330 |
)
|
331 |
|
332 |
+
# Optional: Save the champion encoder for future use
|
333 |
+
joblib.dump(champion_encoder, 'champion_encoder.joblib')
|
334 |
# Enable queuing
|
|
|
335 |
demo.launch(debug=True)
|
336 |
|
337 |
# For local testing
|