Spaces:
Sleeping
Sleeping
from Recent_match_scrapper import get_matches_stats | |
import os | |
import pandas as pd | |
import numpy as np | |
from Meta_scrapper import * | |
from helper import merge_stats, process_kda_perfect, ChampionConverter | |
from Player_scrapper import get_player_stats | |
from Weekly_meta_scrapper import * | |
import pandas as pd | |
import re | |
# ============================================ my functions ========================================================= | |
def create_champion_features_and_return_df(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False): | |
""" | |
Create features for champion prediction using player data. | |
Champion names will be used as column headers. | |
Uses pd.concat to avoid DataFrame fragmentation. | |
""" | |
try: | |
if merged_player_stats is None: | |
print("Loading merged player stats...") | |
input_file = os.path.join("util", "data", "player_stats_merged.csv") | |
merged_player_stats = pd.read_csv(input_file, low_memory=False) | |
#processing kda value | |
merged_player_stats = process_kda_perfect(merged_player_stats) | |
if test_mode: | |
print("Test mode: Using only first 100 rows") | |
merged_player_stats = merged_player_stats.head(100) | |
if meta_stats is None: | |
print("Loading meta stats...") | |
meta_file = os.path.join("util", "data", "meta_stats.csv") | |
meta_stats = pd.read_csv(meta_file, low_memory=False) | |
if weekly_meta is None: | |
print("Loading weekly meta stats...") | |
weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv") | |
weekly_meta = pd.read_csv(weekly_file, low_memory=False) | |
# Initialize variables | |
debug_data = [] | |
original_columns = merged_player_stats.columns.tolist() | |
feature_dict = {} | |
# Copy original columns | |
for col in merged_player_stats.columns: | |
feature_dict[col] = merged_player_stats[col].values.copy() | |
# Initialize the champion converter | |
converter = ChampionConverter() | |
all_champions = converter.champions | |
#total_champions = len(converter.champions) | |
# Get low tier champions and counter information | |
tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8} | |
# Create tier_map as a dictionary of lists | |
tier_map = {} | |
for _, row in meta_stats.iterrows(): | |
champ = row['champion'] | |
tier = row['tier'] | |
if pd.notna(tier): | |
if champ in tier_map: | |
tier_map[champ].append(tier) | |
else: | |
tier_map[champ] = [tier] | |
counter_map = {} | |
for _, row in meta_stats.iterrows(): | |
if pd.notna(row['counter1']): | |
champ = row['champion'] | |
counters = [row['counter1'], row['counter2'], row['counter3']] | |
if champ in counter_map: | |
counter_map[champ].extend([c for c in counters if pd.notna(c)]) | |
else: | |
counter_map[champ] = [c for c in counters if pd.notna(c)] | |
# Ensure unique counters and remove duplicates | |
for champ, counters in counter_map.items(): | |
counter_map[champ] = list(set(counters)) | |
# Move 'champion' column to the first position | |
cols = ['champion'] + [col for col in merged_player_stats if col != 'champion'] | |
merged_player_stats = merged_player_stats[cols] | |
# Define importance weights | |
weights = { | |
'recent': 0.3, # Last 20 games | |
'weekly': 0.4, # Last 7 days | |
'meta': 0.2, # Only from weekly_stats | |
'season': 0.06, # Current season | |
'mastery': 0.04 # All-time mastery | |
} | |
# Process rows in batches | |
batch_size = 100 | |
total_rows = len(merged_player_stats) | |
print(f"Total rows: {total_rows}") | |
for batch_start in range(0, total_rows, batch_size): | |
batch_end = min(batch_start + batch_size, total_rows) | |
batch_rows = merged_player_stats.iloc[batch_start:batch_end] | |
print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)") | |
# Initialize batch scores dictionary | |
batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions} | |
# Process each row in this batch | |
for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()): | |
# Process each champion for this row | |
for champion in all_champions: | |
# Initialize scores for this champion and row | |
champion_scores = { | |
'recent_score': 0, | |
'weekly_score': 0, | |
'meta_score': 0, | |
'season_score': 0, | |
'mastery_score': 0 | |
} | |
# Store debug info if needed | |
base_score_before_penalty = 0 | |
counter_penalty = 0 | |
counter_debug = [] | |
# 1. Recent Performance | |
for i in range(1, 4): | |
if row.get(f'most_champ_{i}') == champion: | |
wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0 | |
kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0 | |
wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0 | |
losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0 | |
games = wins + losses | |
total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20 | |
performance_quality = ( | |
(wr * 0.7) + | |
(min(kda, 10) / 10 * 0.3) | |
) | |
games_factor = min(games / 5, 1.0) | |
games_ratio = games / total_games | |
if games >= 5: | |
if performance_quality < 0.4: | |
performance_quality *= 0.8 | |
elif performance_quality > 0.7: | |
performance_quality *= 1.2 | |
champion_scores['recent_score'] = ( | |
performance_quality * (0.7 + (0.3 * games_factor)) | |
) * (1 + games_ratio * 0.2) | |
break # Exit loop once found | |
# 2. Weekly Performance | |
for i in range(1, 4): | |
if row.get(f'7d_champ_{i}') == champion: | |
weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0 | |
weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0 | |
weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0 | |
weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0 | |
profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5 | |
if weekly_games > 0: | |
wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0 | |
weekly_intensity = min(weekly_games / 10, 1.0) | |
win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0 | |
weekly_performance = ( | |
(weekly_wr * 0.4) + | |
(max(min(wr_trend, 1), -1) * 0.2) + | |
(weekly_intensity * 0.2) + | |
(win_ratio * 0.2) | |
) | |
if weekly_games >= 5: | |
if weekly_performance < 0.4: | |
weekly_performance *= 0.8 | |
elif weekly_performance > 0.7: | |
weekly_performance *= 1.2 | |
champion_scores['weekly_score'] = weekly_performance * ( | |
0.7 + (0.3 * min(weekly_games / 5, 1.0)) | |
) | |
break # Exit loop once found | |
# 3. Meta Score | |
if champion in weekly_meta['champion'].values: | |
weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0] | |
rank = weekly_row['rank'] | |
games = weekly_row['games'] | |
pick_rate = weekly_row['pick'] | |
ban_rate = weekly_row['ban'] | |
weight = ( | |
1 / rank * 0.5 + | |
games / 100 * 0.3 + | |
pick_rate * 0.1 - | |
ban_rate * 0.1 | |
) | |
champion_scores['meta_score'] = weight | |
# 4. Season Performance | |
for i in range(1, 8): | |
if row.get(f'season_champ_{i}') == champion: | |
wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0 | |
games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0 | |
kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0 | |
champion_scores['season_score'] = ( | |
wr * 0.7 + | |
(kda / 10) * 0.3 | |
) * (games / 100) | |
break # Exit loop once found | |
# 5. Mastery Score | |
for i in range(1, 17): | |
if row.get(f'mastery_champ_{i}') == champion: | |
mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0 | |
champion_scores['mastery_score'] = mastery / 7 | |
break # Exit loop once found | |
# Calculate base score for this champion and row | |
base_score = ( | |
champion_scores['recent_score'] * weights['recent'] + | |
champion_scores['weekly_score'] * weights['weekly'] + | |
champion_scores['meta_score'] * weights['meta'] + | |
champion_scores['season_score'] * weights['season'] + | |
champion_scores['mastery_score'] * weights['mastery'] | |
) | |
# Store the pre-penalty score for debugging | |
base_score_before_penalty = base_score | |
# Apply tier penalties | |
if champion in tier_map: | |
highest_tier = min(tier_map[champion]) | |
if highest_tier in tier_penalties: | |
base_score *= tier_penalties[highest_tier] | |
# Process team composition and counter penalties | |
if consider_team_comp: | |
# Check team champions | |
for i in range(1, 5): | |
team_col = f'team_champ{i}' | |
if team_col in row and pd.notna(row[team_col]): | |
if row[team_col] == champion: | |
base_score = 0 | |
break | |
# Only check opponents if base_score isn't already 0 | |
if base_score != 0: | |
counter_penalty = 0 | |
counter_debug = [] # For debug information | |
for i in range(1, 6): | |
opp_col = f'opp_champ{i}' | |
if opp_col in row and pd.notna(row[opp_col]): | |
opp_champ = row[opp_col] | |
if opp_champ == champion: | |
base_score = 0 | |
break | |
if champion in counter_map and opp_champ in counter_map[champion]: | |
counter_penalty += 0.1 | |
counter_debug.append(opp_champ) | |
if counter_penalty > 0: | |
base_score = base_score * (1 - counter_penalty) | |
# Store the final score for this champion and row | |
batch_scores[champion][batch_idx] = max(base_score, 0) | |
# Collect debug data if this is the debug champion | |
if debug == champion: | |
counter_list = [] | |
for i in range(1, 6): | |
opp_col = f'opp_champ{i}' | |
if opp_col in row and pd.notna(row[opp_col]): | |
if champion in counter_map and row[opp_col] in counter_map[champion]: | |
counter_list.append(row[opp_col]) | |
debug_row = { | |
'champion': row['champion'], | |
'recent_score': champion_scores['recent_score'], | |
'weekly_score': champion_scores['weekly_score'], | |
'meta_score': champion_scores['meta_score'], | |
'base_score': base_score_before_penalty, | |
'final_score': base_score, | |
'counter_penalty': counter_penalty if consider_team_comp else 0, | |
'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score, | |
'counter_list_debug': counter_list | |
} | |
debug_data.append(debug_row) | |
# Update feature_dict with batch results | |
for champion in batch_scores: | |
if champion not in feature_dict: | |
feature_dict[champion] = np.zeros(total_rows) | |
feature_dict[champion][batch_start:batch_end] = batch_scores[champion] | |
# Save after each batch with timestamp | |
temp_df = pd.DataFrame({ | |
**{col: feature_dict[col] for col in original_columns}, # Original columns first | |
**{champion: feature_dict[champion] for champion in all_champions} # Then champion columns | |
}) | |
batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv") | |
temp_df.to_csv(batch_save_file, index=False) | |
print(f"Saved batch progress to {batch_save_file}") | |
if debug: | |
print(f"{debug} is countered by: {counter_map[debug]}") | |
# Process debug data if any | |
if debug: | |
debug_df = pd.DataFrame(debug_data) | |
print("\nDebug Data:") | |
print(debug_df) | |
# Create final DataFrame | |
champion_features = pd.DataFrame(feature_dict) | |
# Create the final DataFrame by combining original data with new features | |
features = pd.concat([ | |
merged_player_stats[original_columns], # Keep all original columns | |
champion_features[[col for col in champion_features.columns if col not in original_columns]] # Only new champion columns | |
], axis=1) | |
# Move the champion column to be the first column | |
if 'champion' in features.columns: | |
columns = ['champion'] + [col for col in features.columns if col != 'champion'] | |
features = features[columns] | |
# Print confirmation message | |
print(f"Saved features in data frame.") | |
return features | |
except Exception as e: | |
print(f"\nError occurred: {str(e)}") | |
return None | |
def create_app_user_training_df(url): | |
try: | |
#meta_stats = get_meta_stats() | |
#weekly_meta_stats = get_weekly_meta() | |
# Input validation | |
if not url or not isinstance(url, str): | |
raise ValueError("Invalid URL provided") | |
# Extract region and username | |
match = re.search(r"/summoners/(\w+)/([\w\-]+)", url) | |
if not match: | |
raise ValueError(f"Could not parse region and username from URL: {url}\n Type(url): {type(url)}") | |
region = match.group(1) | |
username = match.group(2) | |
print(f"Extracted - Region: {region}, Username: {username}") | |
# Get recent stats | |
print("Fetching recent matches...") | |
recent_stats = get_matches_stats(region, username) | |
# Validate recent_stats | |
if recent_stats is None or recent_stats.empty: | |
raise ValueError("recent_stats is empty. type(recent_stats): ", type(recent_stats) , " recent_stats: \n", recent_stats) | |
#print("Recent matches columns:", recent_stats.columns.tolist()) | |
# Process player_id | |
recent_stats['player_id'] = recent_stats['player_id'].str.replace(" #", "-", regex=False) | |
#print("Processed player_ids:", recent_stats['player_id'].head()) | |
# Get player stats | |
print("Fetching player stats...") | |
player_stats = get_player_stats(region, username) | |
# Merge stats | |
print("Merging stats...") | |
merged_stats = merge_stats(recent_stats, player_stats) | |
# Validate merged stats | |
if merged_stats is None or merged_stats.empty: | |
raise ValueError("Failed to merge stats") | |
#print("Merged stats columns:", merged_stats.columns.tolist()) | |
# Create features | |
print("Creating champion features...") | |
training_features = create_champion_features_and_return_df( | |
merged_player_stats=merged_stats, | |
debug=None, | |
consider_team_comp=True, | |
test_mode=False | |
) | |
# Final validation | |
if training_features is None or training_features.empty: | |
raise ValueError("Failed to create training features") | |
print("Training features created successfully") | |
return training_features | |
except Exception as e: | |
import traceback | |
error_trace = traceback.format_exc() | |
print(f"Error in create_app_user_training_df:\n{error_trace}") | |
raise Exception(f"Failed to create training dataframe: {str(e)}") | |
# ========================================= end of my functions ===================================================== | |
#url = "https://www.op.gg/summoners/euw/Agurin-EUW" | |
#url = "https://www.op.gg/summoners/euw/Agurin-EUW?queue_type=TOTAL" | |
#return_value = create_app_user_training_df(url) | |
#print("type(Return_value):", type(return_value), "\n return value: \n", return_value) | |