Spaces:

ivwhy
/

lol_champion_pick_predictor

Sleeping

File size: 17,513 Bytes

abcb943

import os
import pandas as pd
import numpy as np
from helper import ChampionConverter, process_kda_perfect

def create_champion_features(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False):
    """
    Create features for champion prediction using player data.
    Champion names will be used as column headers.
    Uses pd.concat to avoid DataFrame fragmentation.
    """
    try:
        if merged_player_stats is None:
            print("Loading merged player stats...")
            input_file = os.path.join("util", "data", "player_stats_merged.csv")
            merged_player_stats = pd.read_csv(input_file, low_memory=False)
            
        #processing kda value
        merged_player_stats = process_kda_perfect(merged_player_stats)
   

        if test_mode:
            print("Test mode: Using only first 100 rows")
            merged_player_stats = merged_player_stats.head(100)
        
        if meta_stats is None:
            print("Loading meta stats...")
            meta_file = os.path.join("util", "data", "meta_stats.csv")
            meta_stats = pd.read_csv(meta_file, low_memory=False)

        if weekly_meta is None:
            print("Loading weekly meta stats...")
            weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv")
            weekly_meta = pd.read_csv(weekly_file, low_memory=False)
        
        
        # Initialize variables
        debug_data = []
        original_columns = merged_player_stats.columns.tolist()
        feature_dict = {}

        # Copy original columns
        for col in merged_player_stats.columns:
            feature_dict[col] = merged_player_stats[col].values.copy()


        # Initialize the champion converter
        converter = ChampionConverter()
        all_champions = converter.champions
        #total_champions = len(converter.champions)
        
            

        # Get low tier champions and counter information
        tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8}

        # Create tier_map as a dictionary of lists
        tier_map = {}
        for _, row in meta_stats.iterrows():
            champ = row['champion']
            tier = row['tier']
            if pd.notna(tier):
                if champ in tier_map:
                    tier_map[champ].append(tier)
                else:
                    tier_map[champ] = [tier]

        counter_map = {}
        for _, row in meta_stats.iterrows():
            if pd.notna(row['counter1']):
                champ = row['champion']
                counters = [row['counter1'], row['counter2'], row['counter3']]
                if champ in counter_map:
                    counter_map[champ].extend([c for c in counters if pd.notna(c)])
                else:
                    counter_map[champ] = [c for c in counters if pd.notna(c)]

        # Ensure unique counters and remove duplicates
        for champ, counters in counter_map.items():
            counter_map[champ] = list(set(counters))

        # Move 'champion' column to the first position
        cols = ['champion'] + [col for col in merged_player_stats if col != 'champion']
        merged_player_stats = merged_player_stats[cols]

        # Define importance weights
        weights = {
            'recent': 0.3,    # Last 20 games
            'weekly': 0.4,    # Last 7 days
            'meta': 0.2,      # Only from weekly_stats
            'season': 0.06,   # Current season
            'mastery': 0.04   # All-time mastery
        }

        # Process rows in batches
        batch_size = 100
        total_rows = len(merged_player_stats)
        
        print(f"Total rows: {total_rows}")

        for batch_start in range(0, total_rows, batch_size):
            batch_end = min(batch_start + batch_size, total_rows)
            batch_rows = merged_player_stats.iloc[batch_start:batch_end]
            print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)")

            # Initialize batch scores dictionary
            batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions}
            
            # Process each row in this batch
            for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()):
                # Process each champion for this row
                for champion in all_champions:
                    # Initialize scores for this champion and row
                    champion_scores = {
                        'recent_score': 0,
                        'weekly_score': 0,
                        'meta_score': 0,
                        'season_score': 0,
                        'mastery_score': 0
                    }

                    # Store debug info if needed
                    base_score_before_penalty = 0
                    counter_penalty = 0
                    counter_debug = []

                    # 1. Recent Performance
                    for i in range(1, 4):
                        if row.get(f'most_champ_{i}') == champion:
                            wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0
                            kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0
                            wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0
                            losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0
                            games = wins + losses
                            total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20
                            
                            performance_quality = (
                                (wr * 0.7) +
                                (min(kda, 10) / 10 * 0.3)
                            )
                            
                            games_factor = min(games / 5, 1.0)
                            games_ratio = games / total_games
                            
                            if games >= 5:
                                if performance_quality < 0.4:
                                    performance_quality *= 0.8
                                elif performance_quality > 0.7:
                                    performance_quality *= 1.2
                            
                            champion_scores['recent_score'] = (
                                performance_quality * (0.7 + (0.3 * games_factor))
                            ) * (1 + games_ratio * 0.2)
                            break  # Exit loop once found
                    
                    # 2. Weekly Performance
                    for i in range(1, 4):
                        if row.get(f'7d_champ_{i}') == champion:
                            weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0
                            weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0
                            weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0
                            weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0
                            profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5
                            
                            if weekly_games > 0:
                                wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0
                                weekly_intensity = min(weekly_games / 10, 1.0)
                                win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0
                                
                                weekly_performance = (
                                    (weekly_wr * 0.4) +
                                    (max(min(wr_trend, 1), -1) * 0.2) +
                                    (weekly_intensity * 0.2) +
                                    (win_ratio * 0.2)
                                )
                                
                                if weekly_games >= 5:
                                    if weekly_performance < 0.4:
                                        weekly_performance *= 0.8
                                    elif weekly_performance > 0.7:
                                        weekly_performance *= 1.2
                                
                                champion_scores['weekly_score'] = weekly_performance * (
                                    0.7 + (0.3 * min(weekly_games / 5, 1.0))
                                )
                                break  # Exit loop once found

                    # 3. Meta Score
                    if champion in weekly_meta['champion'].values:
                        weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0]
                        rank = weekly_row['rank']
                        games = weekly_row['games']
                        pick_rate = weekly_row['pick']
                        ban_rate = weekly_row['ban']
                        
                        weight = (
                            1 / rank * 0.5 +
                            games / 100 * 0.3 +
                            pick_rate * 0.1 -
                            ban_rate * 0.1
                        )
                        
                        champion_scores['meta_score'] = weight

                    # 4. Season Performance
                    for i in range(1, 8):
                        if row.get(f'season_champ_{i}') == champion:
                            wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0
                            games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0
                            kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0
                            
                            champion_scores['season_score'] = (
                                wr * 0.7 +
                                (kda / 10) * 0.3 
                            ) * (games / 100)
                            break  # Exit loop once found
                    
                    # 5. Mastery Score
                    for i in range(1, 17):
                        if row.get(f'mastery_champ_{i}') == champion:
                            mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0            
                            champion_scores['mastery_score'] = mastery / 7
                            break  # Exit loop once found

                    # Calculate base score for this champion and row
                    base_score = (
                        champion_scores['recent_score'] * weights['recent'] +
                        champion_scores['weekly_score'] * weights['weekly'] +
                        champion_scores['meta_score'] * weights['meta'] +
                        champion_scores['season_score'] * weights['season'] +
                        champion_scores['mastery_score'] * weights['mastery']
                    )

                    
                    # Store the pre-penalty score for debugging
                    base_score_before_penalty = base_score

                    # Apply tier penalties
                    if champion in tier_map:
                        highest_tier = min(tier_map[champion])
                        if highest_tier in tier_penalties:
                            base_score *= tier_penalties[highest_tier]

                    # Process team composition and counter penalties
                    if consider_team_comp:
                        # Check team champions
                        for i in range(1, 5):
                            team_col = f'team_champ{i}'
                            if team_col in row and pd.notna(row[team_col]):
                                if row[team_col] == champion:
                                    base_score = 0
                                    break
                        
                        # Only check opponents if base_score isn't already 0
                        if base_score != 0:
                            counter_penalty = 0
                            counter_debug = []  # For debug information
                            
                            for i in range(1, 6):
                                opp_col = f'opp_champ{i}'
                                if opp_col in row and pd.notna(row[opp_col]):
                                    opp_champ = row[opp_col]
                                    if opp_champ == champion:
                                        base_score = 0
                                        break
                                    if champion in counter_map and opp_champ in counter_map[champion]:
                                        counter_penalty += 0.1
                                        counter_debug.append(opp_champ)
                            
                            if counter_penalty > 0:
                                base_score = base_score * (1 - counter_penalty)

                    # Store the final score for this champion and row
                    batch_scores[champion][batch_idx] = max(base_score, 0)

                    # Collect debug data if this is the debug champion
                    if debug == champion:
                        counter_list = []
                        for i in range(1, 6):
                            opp_col = f'opp_champ{i}'
                            if opp_col in row and pd.notna(row[opp_col]):
                                if champion in counter_map and row[opp_col] in counter_map[champion]:
                                    counter_list.append(row[opp_col])

                        debug_row = {
                            'champion': row['champion'],
                            'recent_score': champion_scores['recent_score'],
                            'weekly_score': champion_scores['weekly_score'],
                            'meta_score': champion_scores['meta_score'],
                            'base_score': base_score_before_penalty,
                            'final_score': base_score,
                            'counter_penalty': counter_penalty if consider_team_comp else 0,
                            'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score,
                            'counter_list_debug': counter_list
                        }
                        debug_data.append(debug_row)

            # Update feature_dict with batch results
            for champion in batch_scores:
                if champion not in feature_dict:
                    feature_dict[champion] = np.zeros(total_rows)
                feature_dict[champion][batch_start:batch_end] = batch_scores[champion]

            # Save after each batch with timestamp
            temp_df = pd.DataFrame({
                **{col: feature_dict[col] for col in original_columns},  # Original columns first
                **{champion: feature_dict[champion] for champion in all_champions}  # Then champion columns
            })
            
            batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv")
            temp_df.to_csv(batch_save_file, index=False)
            print(f"Saved batch progress to {batch_save_file}")

            if debug:
                print(f"{debug} is countered by: {counter_map[debug]}")

        # Process debug data if any
        if debug:
            debug_df = pd.DataFrame(debug_data)
            print("\nDebug Data:")
            print(debug_df)

        # Create final DataFrame
        champion_features = pd.DataFrame(feature_dict)

        # Create the final DataFrame by combining original data with new features
        features = pd.concat([
            merged_player_stats[original_columns],  # Keep all original columns
            champion_features[[col for col in champion_features.columns if col not in original_columns]]  # Only new champion columns
        ], axis=1)

        # Move the champion column to be the first column
        if 'champion' in features.columns:
            columns = ['champion'] + [col for col in features.columns if col != 'champion']
            features = features[columns]
        
        # Save to CSV with current date in filename
        output_file = os.path.join("util", "data", f"feature_eng_stats.csv")
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        features.to_csv(output_file, index=False)
        
        # Print confirmation message
        print(f"Saved features to {output_file}")
            
        return features

    except Exception as e:
        print(f"\nError occurred: {str(e)}")
        return None

if __name__ == "__main__":
    try:
        input_file = os.path.join("util", "data", f"player_stats_merged_2025-01-05.csv")              
        merged_stats = pd.read_csv(input_file)

        features = create_champion_features(
            merged_player_stats=merged_stats,
            debug='Viktor',
            consider_team_comp=True,
            test_mode=True
        )
        
        if features is not None:
            print("\nProcessing completed successfully!")
            print(f"Generated features for {len(features)} rows")
        else:
            print("\nProcessing failed or was interrupted.")
            
    except Exception as e:
        print(f"\nFatal error: {str(e)}")