from Recent_match_scrapper import get_matches_stats import os import pandas as pd import numpy as np from Meta_scrapper import * from helper import merge_stats, process_kda_perfect, ChampionConverter from Player_scrapper import get_player_stats from Weekly_meta_scrapper import * import pandas as pd import re # ============================================ my functions ========================================================= def create_champion_features_and_return_df(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False): """ Create features for champion prediction using player data. Champion names will be used as column headers. Uses pd.concat to avoid DataFrame fragmentation. """ try: if merged_player_stats is None: print("Loading merged player stats...") input_file = os.path.join("util", "data", "player_stats_merged.csv") merged_player_stats = pd.read_csv(input_file, low_memory=False) #processing kda value merged_player_stats = process_kda_perfect(merged_player_stats) if test_mode: print("Test mode: Using only first 100 rows") merged_player_stats = merged_player_stats.head(100) if meta_stats is None: print("Loading meta stats...") meta_file = os.path.join("util", "data", "meta_stats.csv") meta_stats = pd.read_csv(meta_file, low_memory=False) if weekly_meta is None: print("Loading weekly meta stats...") weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv") weekly_meta = pd.read_csv(weekly_file, low_memory=False) # Initialize variables debug_data = [] original_columns = merged_player_stats.columns.tolist() feature_dict = {} # Copy original columns for col in merged_player_stats.columns: feature_dict[col] = merged_player_stats[col].values.copy() # Initialize the champion converter converter = ChampionConverter() all_champions = converter.champions #total_champions = len(converter.champions) # Get low tier champions and counter information tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8} # Create tier_map as a dictionary of lists tier_map = {} for _, row in meta_stats.iterrows(): champ = row['champion'] tier = row['tier'] if pd.notna(tier): if champ in tier_map: tier_map[champ].append(tier) else: tier_map[champ] = [tier] counter_map = {} for _, row in meta_stats.iterrows(): if pd.notna(row['counter1']): champ = row['champion'] counters = [row['counter1'], row['counter2'], row['counter3']] if champ in counter_map: counter_map[champ].extend([c for c in counters if pd.notna(c)]) else: counter_map[champ] = [c for c in counters if pd.notna(c)] # Ensure unique counters and remove duplicates for champ, counters in counter_map.items(): counter_map[champ] = list(set(counters)) # Move 'champion' column to the first position cols = ['champion'] + [col for col in merged_player_stats if col != 'champion'] merged_player_stats = merged_player_stats[cols] # Define importance weights weights = { 'recent': 0.3, # Last 20 games 'weekly': 0.4, # Last 7 days 'meta': 0.2, # Only from weekly_stats 'season': 0.06, # Current season 'mastery': 0.04 # All-time mastery } # Process rows in batches batch_size = 100 total_rows = len(merged_player_stats) print(f"Total rows: {total_rows}") for batch_start in range(0, total_rows, batch_size): batch_end = min(batch_start + batch_size, total_rows) batch_rows = merged_player_stats.iloc[batch_start:batch_end] print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)") # Initialize batch scores dictionary batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions} # Process each row in this batch for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()): # Process each champion for this row for champion in all_champions: # Initialize scores for this champion and row champion_scores = { 'recent_score': 0, 'weekly_score': 0, 'meta_score': 0, 'season_score': 0, 'mastery_score': 0 } # Store debug info if needed base_score_before_penalty = 0 counter_penalty = 0 counter_debug = [] # 1. Recent Performance for i in range(1, 4): if row.get(f'most_champ_{i}') == champion: wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0 kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0 wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0 losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0 games = wins + losses total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20 performance_quality = ( (wr * 0.7) + (min(kda, 10) / 10 * 0.3) ) games_factor = min(games / 5, 1.0) games_ratio = games / total_games if games >= 5: if performance_quality < 0.4: performance_quality *= 0.8 elif performance_quality > 0.7: performance_quality *= 1.2 champion_scores['recent_score'] = ( performance_quality * (0.7 + (0.3 * games_factor)) ) * (1 + games_ratio * 0.2) break # Exit loop once found # 2. Weekly Performance for i in range(1, 4): if row.get(f'7d_champ_{i}') == champion: weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0 weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0 weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0 weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0 profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5 if weekly_games > 0: wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0 weekly_intensity = min(weekly_games / 10, 1.0) win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0 weekly_performance = ( (weekly_wr * 0.4) + (max(min(wr_trend, 1), -1) * 0.2) + (weekly_intensity * 0.2) + (win_ratio * 0.2) ) if weekly_games >= 5: if weekly_performance < 0.4: weekly_performance *= 0.8 elif weekly_performance > 0.7: weekly_performance *= 1.2 champion_scores['weekly_score'] = weekly_performance * ( 0.7 + (0.3 * min(weekly_games / 5, 1.0)) ) break # Exit loop once found # 3. Meta Score if champion in weekly_meta['champion'].values: weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0] rank = weekly_row['rank'] games = weekly_row['games'] pick_rate = weekly_row['pick'] ban_rate = weekly_row['ban'] weight = ( 1 / rank * 0.5 + games / 100 * 0.3 + pick_rate * 0.1 - ban_rate * 0.1 ) champion_scores['meta_score'] = weight # 4. Season Performance for i in range(1, 8): if row.get(f'season_champ_{i}') == champion: wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0 games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0 kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0 champion_scores['season_score'] = ( wr * 0.7 + (kda / 10) * 0.3 ) * (games / 100) break # Exit loop once found # 5. Mastery Score for i in range(1, 17): if row.get(f'mastery_champ_{i}') == champion: mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0 champion_scores['mastery_score'] = mastery / 7 break # Exit loop once found # Calculate base score for this champion and row base_score = ( champion_scores['recent_score'] * weights['recent'] + champion_scores['weekly_score'] * weights['weekly'] + champion_scores['meta_score'] * weights['meta'] + champion_scores['season_score'] * weights['season'] + champion_scores['mastery_score'] * weights['mastery'] ) # Store the pre-penalty score for debugging base_score_before_penalty = base_score # Apply tier penalties if champion in tier_map: highest_tier = min(tier_map[champion]) if highest_tier in tier_penalties: base_score *= tier_penalties[highest_tier] # Process team composition and counter penalties if consider_team_comp: # Check team champions for i in range(1, 5): team_col = f'team_champ{i}' if team_col in row and pd.notna(row[team_col]): if row[team_col] == champion: base_score = 0 break # Only check opponents if base_score isn't already 0 if base_score != 0: counter_penalty = 0 counter_debug = [] # For debug information for i in range(1, 6): opp_col = f'opp_champ{i}' if opp_col in row and pd.notna(row[opp_col]): opp_champ = row[opp_col] if opp_champ == champion: base_score = 0 break if champion in counter_map and opp_champ in counter_map[champion]: counter_penalty += 0.1 counter_debug.append(opp_champ) if counter_penalty > 0: base_score = base_score * (1 - counter_penalty) # Store the final score for this champion and row batch_scores[champion][batch_idx] = max(base_score, 0) # Collect debug data if this is the debug champion if debug == champion: counter_list = [] for i in range(1, 6): opp_col = f'opp_champ{i}' if opp_col in row and pd.notna(row[opp_col]): if champion in counter_map and row[opp_col] in counter_map[champion]: counter_list.append(row[opp_col]) debug_row = { 'champion': row['champion'], 'recent_score': champion_scores['recent_score'], 'weekly_score': champion_scores['weekly_score'], 'meta_score': champion_scores['meta_score'], 'base_score': base_score_before_penalty, 'final_score': base_score, 'counter_penalty': counter_penalty if consider_team_comp else 0, 'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score, 'counter_list_debug': counter_list } debug_data.append(debug_row) # Update feature_dict with batch results for champion in batch_scores: if champion not in feature_dict: feature_dict[champion] = np.zeros(total_rows) feature_dict[champion][batch_start:batch_end] = batch_scores[champion] # Save after each batch with timestamp temp_df = pd.DataFrame({ **{col: feature_dict[col] for col in original_columns}, # Original columns first **{champion: feature_dict[champion] for champion in all_champions} # Then champion columns }) batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv") temp_df.to_csv(batch_save_file, index=False) print(f"Saved batch progress to {batch_save_file}") if debug: print(f"{debug} is countered by: {counter_map[debug]}") # Process debug data if any if debug: debug_df = pd.DataFrame(debug_data) print("\nDebug Data:") print(debug_df) # Create final DataFrame champion_features = pd.DataFrame(feature_dict) # Create the final DataFrame by combining original data with new features features = pd.concat([ merged_player_stats[original_columns], # Keep all original columns champion_features[[col for col in champion_features.columns if col not in original_columns]] # Only new champion columns ], axis=1) # Move the champion column to be the first column if 'champion' in features.columns: columns = ['champion'] + [col for col in features.columns if col != 'champion'] features = features[columns] # Print confirmation message print(f"Saved features in data frame.") return features except Exception as e: print(f"\nError occurred: {str(e)}") return None ''' def get_weekly_meta(): BASE_URL = "https://www.op.gg/statistics/champions?tier=challenger&period=week&mode=ranked" driver = setup_driver() try: driver.get(BASE_URL) table = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#content-container > div:nth-child(2) > table")) ) # Extract table rows rows = table.find_elements(By.TAG_NAME, "tr") # Define the column order columns = ["rank", "champion", "games", "KDA", "WR", "pick", "ban", "cs", "gold"] data = [] for row in rows[1:]: # Skip the header row cells = row.find_elements(By.TAG_NAME, "td") row_data = [cell.text for cell in cells] if len(row_data) >= len(columns): # Remove ":1" from KDA format row_data[3] = row_data[3].replace(":1", "") # Convert WR, pick, and ban percentages to decimals row_data[4] = convert_percentage_to_decimal(row_data[4]) row_data[5] = convert_percentage_to_decimal(row_data[5]) row_data[6] = convert_percentage_to_decimal(row_data[6]) # Remove commas from the gold values row_data[8] = int(row_data[8].replace(",", "")) data.append(row_data[:len(columns)]) # Create a DataFrame with the extracted data df = pd.DataFrame(data, columns=columns) # Print confirmation message print(f"Saved weekly meta to dataframe: \n", df) return df except Exception as e: print(f"Error: {e}") return None finally: driver.quit() def get_meta_stats(): """Main function to scrape champion data with improved error handling and logging""" driver = None try: driver = setup_driver() all_roles_data = [] for role in ROLES: role_url = BASE_URL.format(role=role) role_data = get_champion_table_data(driver, role_url, role) all_roles_data.extend(role_data) if not all_roles_data: print("No data was collected from any role") return pd.DataFrame() df = pd.DataFrame(all_roles_data) print(f"Saved meta stats to df:\n", df) return df except Exception as e: print(f"Error in get_meta_stats: {e}") return pd.DataFrame() finally: if driver: driver.quit() ''' def create_app_user_training_df(url): try: #meta_stats = get_meta_stats() #weekly_meta_stats = get_weekly_meta() print("========= Inside get_user_training_df(player_opgg_url) ============= \n") print("input url: ", url, "\n") # Input validation if not url or not isinstance(url, str): raise ValueError("Invalid URL provided") # Extract region and username match = re.search(r"/summoners/(\w+)/([\w\-]+)$", url) if not match: raise ValueError(f"Could not parse region and username from URL: {url}") region = match.group(1) username = match.group(2) print(f"Extracted - Region: {region}, Username: {username}") # Get recent stats print("Fetching recent matches...") recent_stats = get_matches_stats(region, username) # Validate recent_stats if recent_stats is None or recent_stats.empty: raise ValueError("recent_stats is empty. type(recent_stats): ", type(recent_stats) , " recent_stats: \n", recent_stats) print("Recent matches columns:", recent_stats.columns.tolist()) # Check if player_id exists before trying to process it #if 'player_id' not in recent_stats.columns: # raise KeyError("player_id column not found in recent_stats") # Process player_id recent_stats['player_id'] = recent_stats['player_id'].str.replace(" #", "-", regex=False) print("Processed player_ids:", recent_stats['player_id'].head()) # Get player stats print("Fetching player stats...") player_stats = get_player_stats(region, username) # Validate player_stats DONT!!! its a tuple leave it be. #if player_stats is None or player_stats.empty: # raise ValueError("No player stats found") #print("Player stats columns:", player_stats.columns.tolist()) # Merge stats print("Merging stats...") merged_stats = merge_stats(recent_stats, player_stats) # Validate merged stats if merged_stats is None or merged_stats.empty: raise ValueError("Failed to merge stats") print("Merged stats columns:", merged_stats.columns.tolist()) # Create features print("Creating champion features...") training_features = create_champion_features_and_return_df( merged_player_stats=merged_stats, debug=None, consider_team_comp=True, test_mode=False ) # Final validation if training_features is None or training_features.empty: raise ValueError("Failed to create training features") print("Training features created successfully") return training_features except Exception as e: import traceback error_trace = traceback.format_exc() print(f"Error in create_app_user_training_df:\n{error_trace}") raise Exception(f"Failed to create training dataframe: {str(e)}") # ========================================= end of my functions ===================================================== #meta_stats = get_meta_stats() #weekly_meta_stats = get_weekly_meta() #url = "https://www.op.gg/summoners/euw/Agurin-EUW" #return_value = create_app_user_training_df(url) #print("type(Return_value):", type(return_value), "\n return value: \n", return_value)