Spaces:

ivwhy
/

lol_champion_pick_predictor

Sleeping

lol_champion_pick_predictor / util /app_training_df_getter.py

Jimin Park

added new structure

cc07c1e 6 months ago

19.8 kB

	from Recent_match_scrapper import get_matches_stats
	import os
	import pandas as pd
	import numpy as np
	from Meta_scrapper import *
	from helper import merge_stats, process_kda_perfect, ChampionConverter
	from Player_scrapper import get_player_stats
	from Weekly_meta_scrapper import *
	import pandas as pd
	import re

	# ============================================ my functions =========================================================


	def create_champion_features_and_return_df(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False):
	"""
	Create features for champion prediction using player data.
	Champion names will be used as column headers.
	Uses pd.concat to avoid DataFrame fragmentation.
	"""
	try:
	if merged_player_stats is None:
	print("Loading merged player stats...")
	input_file = os.path.join("util", "data", "player_stats_merged.csv")
	merged_player_stats = pd.read_csv(input_file, low_memory=False)

	#processing kda value
	merged_player_stats = process_kda_perfect(merged_player_stats)


	if test_mode:
	print("Test mode: Using only first 100 rows")
	merged_player_stats = merged_player_stats.head(100)

	if meta_stats is None:
	print("Loading meta stats...")
	meta_file = os.path.join("util", "data", "meta_stats.csv")
	meta_stats = pd.read_csv(meta_file, low_memory=False)

	if weekly_meta is None:
	print("Loading weekly meta stats...")
	weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv")
	weekly_meta = pd.read_csv(weekly_file, low_memory=False)


	# Initialize variables
	debug_data = []
	original_columns = merged_player_stats.columns.tolist()
	feature_dict = {}

	# Copy original columns
	for col in merged_player_stats.columns:
	feature_dict[col] = merged_player_stats[col].values.copy()


	# Initialize the champion converter
	converter = ChampionConverter()
	all_champions = converter.champions
	#total_champions = len(converter.champions)



	# Get low tier champions and counter information
	tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8}

	# Create tier_map as a dictionary of lists
	tier_map = {}
	for _, row in meta_stats.iterrows():
	champ = row['champion']
	tier = row['tier']
	if pd.notna(tier):
	if champ in tier_map:
	tier_map[champ].append(tier)
	else:
	tier_map[champ] = [tier]

	counter_map = {}
	for _, row in meta_stats.iterrows():
	if pd.notna(row['counter1']):
	champ = row['champion']
	counters = [row['counter1'], row['counter2'], row['counter3']]
	if champ in counter_map:
	counter_map[champ].extend([c for c in counters if pd.notna(c)])
	else:
	counter_map[champ] = [c for c in counters if pd.notna(c)]

	# Ensure unique counters and remove duplicates
	for champ, counters in counter_map.items():
	counter_map[champ] = list(set(counters))

	# Move 'champion' column to the first position
	cols = ['champion'] + [col for col in merged_player_stats if col != 'champion']
	merged_player_stats = merged_player_stats[cols]

	# Define importance weights
	weights = {
	'recent': 0.3, # Last 20 games
	'weekly': 0.4, # Last 7 days
	'meta': 0.2, # Only from weekly_stats
	'season': 0.06, # Current season
	'mastery': 0.04 # All-time mastery
	}

	# Process rows in batches
	batch_size = 100
	total_rows = len(merged_player_stats)

	print(f"Total rows: {total_rows}")

	for batch_start in range(0, total_rows, batch_size):
	batch_end = min(batch_start + batch_size, total_rows)
	batch_rows = merged_player_stats.iloc[batch_start:batch_end]
	print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)")

	# Initialize batch scores dictionary
	batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions}

	# Process each row in this batch
	for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()):
	# Process each champion for this row
	for champion in all_champions:
	# Initialize scores for this champion and row
	champion_scores = {
	'recent_score': 0,
	'weekly_score': 0,
	'meta_score': 0,
	'season_score': 0,
	'mastery_score': 0
	}

	# Store debug info if needed
	base_score_before_penalty = 0
	counter_penalty = 0
	counter_debug = []

	# 1. Recent Performance
	for i in range(1, 4):
	if row.get(f'most_champ_{i}') == champion:
	wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0
	kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0
	wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0
	losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0
	games = wins + losses
	total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20

	performance_quality = (
	(wr * 0.7) +
	(min(kda, 10) / 10 * 0.3)
	)

	games_factor = min(games / 5, 1.0)
	games_ratio = games / total_games

	if games >= 5:
	if performance_quality < 0.4:
	performance_quality *= 0.8
	elif performance_quality > 0.7:
	performance_quality *= 1.2

	champion_scores['recent_score'] = (
	performance_quality * (0.7 + (0.3 * games_factor))
	) * (1 + games_ratio * 0.2)
	break # Exit loop once found

	# 2. Weekly Performance
	for i in range(1, 4):
	if row.get(f'7d_champ_{i}') == champion:
	weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0
	weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0
	weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0
	weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0
	profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5

	if weekly_games > 0:
	wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0
	weekly_intensity = min(weekly_games / 10, 1.0)
	win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0

	weekly_performance = (
	(weekly_wr * 0.4) +
	(max(min(wr_trend, 1), -1) * 0.2) +
	(weekly_intensity * 0.2) +
	(win_ratio * 0.2)
	)

	if weekly_games >= 5:
	if weekly_performance < 0.4:
	weekly_performance *= 0.8
	elif weekly_performance > 0.7:
	weekly_performance *= 1.2

	champion_scores['weekly_score'] = weekly_performance * (
	0.7 + (0.3 * min(weekly_games / 5, 1.0))
	)
	break # Exit loop once found

	# 3. Meta Score
	if champion in weekly_meta['champion'].values:
	weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0]
	rank = weekly_row['rank']
	games = weekly_row['games']
	pick_rate = weekly_row['pick']
	ban_rate = weekly_row['ban']

	weight = (
	1 / rank * 0.5 +
	games / 100 * 0.3 +
	pick_rate * 0.1 -
	ban_rate * 0.1
	)

	champion_scores['meta_score'] = weight

	# 4. Season Performance
	for i in range(1, 8):
	if row.get(f'season_champ_{i}') == champion:
	wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0
	games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0
	kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0

	champion_scores['season_score'] = (
	wr * 0.7 +
	(kda / 10) * 0.3
	) * (games / 100)
	break # Exit loop once found

	# 5. Mastery Score
	for i in range(1, 17):
	if row.get(f'mastery_champ_{i}') == champion:
	mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0
	champion_scores['mastery_score'] = mastery / 7
	break # Exit loop once found

	# Calculate base score for this champion and row
	base_score = (
	champion_scores['recent_score'] * weights['recent'] +
	champion_scores['weekly_score'] * weights['weekly'] +
	champion_scores['meta_score'] * weights['meta'] +
	champion_scores['season_score'] * weights['season'] +
	champion_scores['mastery_score'] * weights['mastery']
	)


	# Store the pre-penalty score for debugging
	base_score_before_penalty = base_score

	# Apply tier penalties
	if champion in tier_map:
	highest_tier = min(tier_map[champion])
	if highest_tier in tier_penalties:
	base_score *= tier_penalties[highest_tier]

	# Process team composition and counter penalties
	if consider_team_comp:
	# Check team champions
	for i in range(1, 5):
	team_col = f'team_champ{i}'
	if team_col in row and pd.notna(row[team_col]):
	if row[team_col] == champion:
	base_score = 0
	break

	# Only check opponents if base_score isn't already 0
	if base_score != 0:
	counter_penalty = 0
	counter_debug = [] # For debug information

	for i in range(1, 6):
	opp_col = f'opp_champ{i}'
	if opp_col in row and pd.notna(row[opp_col]):
	opp_champ = row[opp_col]
	if opp_champ == champion:
	base_score = 0
	break
	if champion in counter_map and opp_champ in counter_map[champion]:
	counter_penalty += 0.1
	counter_debug.append(opp_champ)

	if counter_penalty > 0:
	base_score = base_score * (1 - counter_penalty)

	# Store the final score for this champion and row
	batch_scores[champion][batch_idx] = max(base_score, 0)

	# Collect debug data if this is the debug champion
	if debug == champion:
	counter_list = []
	for i in range(1, 6):
	opp_col = f'opp_champ{i}'
	if opp_col in row and pd.notna(row[opp_col]):
	if champion in counter_map and row[opp_col] in counter_map[champion]:
	counter_list.append(row[opp_col])

	debug_row = {
	'champion': row['champion'],
	'recent_score': champion_scores['recent_score'],
	'weekly_score': champion_scores['weekly_score'],
	'meta_score': champion_scores['meta_score'],
	'base_score': base_score_before_penalty,
	'final_score': base_score,
	'counter_penalty': counter_penalty if consider_team_comp else 0,
	'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score,
	'counter_list_debug': counter_list
	}
	debug_data.append(debug_row)

	# Update feature_dict with batch results
	for champion in batch_scores:
	if champion not in feature_dict:
	feature_dict[champion] = np.zeros(total_rows)
	feature_dict[champion][batch_start:batch_end] = batch_scores[champion]

	# Save after each batch with timestamp
	temp_df = pd.DataFrame({
	**{col: feature_dict[col] for col in original_columns}, # Original columns first
	**{champion: feature_dict[champion] for champion in all_champions} # Then champion columns
	})

	batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv")
	temp_df.to_csv(batch_save_file, index=False)
	print(f"Saved batch progress to {batch_save_file}")

	if debug:
	print(f"{debug} is countered by: {counter_map[debug]}")

	# Process debug data if any
	if debug:
	debug_df = pd.DataFrame(debug_data)
	print("\nDebug Data:")
	print(debug_df)

	# Create final DataFrame
	champion_features = pd.DataFrame(feature_dict)

	# Create the final DataFrame by combining original data with new features
	features = pd.concat([
	merged_player_stats[original_columns], # Keep all original columns
	champion_features[[col for col in champion_features.columns if col not in original_columns]] # Only new champion columns
	], axis=1)

	# Move the champion column to be the first column
	if 'champion' in features.columns:
	columns = ['champion'] + [col for col in features.columns if col != 'champion']
	features = features[columns]

	# Print confirmation message
	print(f"Saved features in data frame.")

	return features

	except Exception as e:
	print(f"\nError occurred: {str(e)}")
	return None

	def create_app_user_training_df(url):
	try:
	#meta_stats = get_meta_stats()
	#weekly_meta_stats = get_weekly_meta()

	# Input validation
	if not url or not isinstance(url, str):
	raise ValueError("Invalid URL provided")

	# Extract region and username
	match = re.search(r"/summoners/(\w+)/([\w\-]+)", url)
	if not match:
	raise ValueError(f"Could not parse region and username from URL: {url}\n Type(url): {type(url)}")

	region = match.group(1)
	username = match.group(2)
	print(f"Extracted - Region: {region}, Username: {username}")

	# Get recent stats
	print("Fetching recent matches...")
	recent_stats = get_matches_stats(region, username)

	# Validate recent_stats
	if recent_stats is None or recent_stats.empty:
	raise ValueError("recent_stats is empty. type(recent_stats): ", type(recent_stats) , " recent_stats: \n", recent_stats)

	#print("Recent matches columns:", recent_stats.columns.tolist())

	# Process player_id
	recent_stats['player_id'] = recent_stats['player_id'].str.replace(" #", "-", regex=False)
	#print("Processed player_ids:", recent_stats['player_id'].head())

	# Get player stats
	print("Fetching player stats...")
	player_stats = get_player_stats(region, username)

	# Merge stats
	print("Merging stats...")
	merged_stats = merge_stats(recent_stats, player_stats)

	# Validate merged stats
	if merged_stats is None or merged_stats.empty:
	raise ValueError("Failed to merge stats")

	#print("Merged stats columns:", merged_stats.columns.tolist())

	# Create features
	print("Creating champion features...")
	training_features = create_champion_features_and_return_df(
	merged_player_stats=merged_stats,
	debug=None,
	consider_team_comp=True,
	test_mode=False
	)

	# Final validation
	if training_features is None or training_features.empty:
	raise ValueError("Failed to create training features")

	print("Training features created successfully")
	return training_features

	except Exception as e:
	import traceback
	error_trace = traceback.format_exc()
	print(f"Error in create_app_user_training_df:\n{error_trace}")
	raise Exception(f"Failed to create training dataframe: {str(e)}")


	# ========================================= end of my functions =====================================================

	#url = "https://www.op.gg/summoners/euw/Agurin-EUW"
	#url = "https://www.op.gg/summoners/euw/Agurin-EUW?queue_type=TOTAL"
	#return_value = create_app_user_training_df(url)
	#print("type(Return_value):", type(return_value), "\n return value: \n", return_value)