|
from Recent_match_scrapper import get_matches_stats |
|
import os |
|
import pandas as pd |
|
import numpy as np |
|
from Meta_scrapper import * |
|
from helper import merge_stats, process_kda_perfect, ChampionConverter |
|
from Player_scrapper import get_player_stats |
|
from Weekly_meta_scrapper import * |
|
import pandas as pd |
|
import re |
|
|
|
|
|
|
|
|
|
def create_champion_features_and_return_df(merged_player_stats=None, meta_stats=None, weekly_meta=None, debug=None, consider_team_comp=True, test_mode=False): |
|
""" |
|
Create features for champion prediction using player data. |
|
Champion names will be used as column headers. |
|
Uses pd.concat to avoid DataFrame fragmentation. |
|
""" |
|
try: |
|
if merged_player_stats is None: |
|
print("Loading merged player stats...") |
|
input_file = os.path.join("util", "data", "player_stats_merged.csv") |
|
merged_player_stats = pd.read_csv(input_file, low_memory=False) |
|
|
|
|
|
merged_player_stats = process_kda_perfect(merged_player_stats) |
|
|
|
|
|
if test_mode: |
|
print("Test mode: Using only first 100 rows") |
|
merged_player_stats = merged_player_stats.head(100) |
|
|
|
if meta_stats is None: |
|
print("Loading meta stats...") |
|
meta_file = os.path.join("util", "data", "meta_stats.csv") |
|
meta_stats = pd.read_csv(meta_file, low_memory=False) |
|
|
|
if weekly_meta is None: |
|
print("Loading weekly meta stats...") |
|
weekly_file = os.path.join("util", "data", "weekly_meta_stats.csv") |
|
weekly_meta = pd.read_csv(weekly_file, low_memory=False) |
|
|
|
|
|
|
|
debug_data = [] |
|
original_columns = merged_player_stats.columns.tolist() |
|
feature_dict = {} |
|
|
|
|
|
for col in merged_player_stats.columns: |
|
feature_dict[col] = merged_player_stats[col].values.copy() |
|
|
|
|
|
|
|
converter = ChampionConverter() |
|
all_champions = converter.champions |
|
|
|
|
|
|
|
|
|
|
|
tier_penalties = {3: 0.9, 4: 0.85, 5: 0.8} |
|
|
|
|
|
tier_map = {} |
|
for _, row in meta_stats.iterrows(): |
|
champ = row['champion'] |
|
tier = row['tier'] |
|
if pd.notna(tier): |
|
if champ in tier_map: |
|
tier_map[champ].append(tier) |
|
else: |
|
tier_map[champ] = [tier] |
|
|
|
counter_map = {} |
|
for _, row in meta_stats.iterrows(): |
|
if pd.notna(row['counter1']): |
|
champ = row['champion'] |
|
counters = [row['counter1'], row['counter2'], row['counter3']] |
|
if champ in counter_map: |
|
counter_map[champ].extend([c for c in counters if pd.notna(c)]) |
|
else: |
|
counter_map[champ] = [c for c in counters if pd.notna(c)] |
|
|
|
|
|
for champ, counters in counter_map.items(): |
|
counter_map[champ] = list(set(counters)) |
|
|
|
|
|
cols = ['champion'] + [col for col in merged_player_stats if col != 'champion'] |
|
merged_player_stats = merged_player_stats[cols] |
|
|
|
|
|
weights = { |
|
'recent': 0.3, |
|
'weekly': 0.4, |
|
'meta': 0.2, |
|
'season': 0.06, |
|
'mastery': 0.04 |
|
} |
|
|
|
|
|
batch_size = 100 |
|
total_rows = len(merged_player_stats) |
|
|
|
print(f"Total rows: {total_rows}") |
|
|
|
for batch_start in range(0, total_rows, batch_size): |
|
batch_end = min(batch_start + batch_size, total_rows) |
|
batch_rows = merged_player_stats.iloc[batch_start:batch_end] |
|
print(f"\nProcessing rows {batch_start} to {batch_end} ({batch_start/total_rows*100:.2f}% complete)") |
|
|
|
|
|
batch_scores = {champion: np.zeros(len(batch_rows)) for champion in all_champions} |
|
|
|
|
|
for batch_idx, (idx, row) in enumerate(batch_rows.iterrows()): |
|
|
|
for champion in all_champions: |
|
|
|
champion_scores = { |
|
'recent_score': 0, |
|
'weekly_score': 0, |
|
'meta_score': 0, |
|
'season_score': 0, |
|
'mastery_score': 0 |
|
} |
|
|
|
|
|
base_score_before_penalty = 0 |
|
counter_penalty = 0 |
|
counter_debug = [] |
|
|
|
|
|
for i in range(1, 4): |
|
if row.get(f'most_champ_{i}') == champion: |
|
wr = float(row[f'WR_{i}']) if pd.notna(row[f'WR_{i}']) else 0 |
|
kda = float(row[f'KDA_{i}']) if pd.notna(row[f'KDA_{i}']) else 0 |
|
wins = float(row[f'W_{i}']) if pd.notna(row[f'W_{i}']) else 0 |
|
losses = float(row[f'L_{i}']) if pd.notna(row[f'L_{i}']) else 0 |
|
games = wins + losses |
|
total_games = float(row['total_games']) if pd.notna(row['total_games']) else 20 |
|
|
|
performance_quality = ( |
|
(wr * 0.7) + |
|
(min(kda, 10) / 10 * 0.3) |
|
) |
|
|
|
games_factor = min(games / 5, 1.0) |
|
games_ratio = games / total_games |
|
|
|
if games >= 5: |
|
if performance_quality < 0.4: |
|
performance_quality *= 0.8 |
|
elif performance_quality > 0.7: |
|
performance_quality *= 1.2 |
|
|
|
champion_scores['recent_score'] = ( |
|
performance_quality * (0.7 + (0.3 * games_factor)) |
|
) * (1 + games_ratio * 0.2) |
|
break |
|
|
|
|
|
for i in range(1, 4): |
|
if row.get(f'7d_champ_{i}') == champion: |
|
weekly_wins = float(row[f'7d_W_{i}']) if pd.notna(row[f'7d_W_{i}']) else 0 |
|
weekly_losses = float(row[f'7d_L_{i}']) if pd.notna(row[f'7d_L_{i}']) else 0 |
|
weekly_games = float(row[f'7d_total_{i}']) if pd.notna(row[f'7d_total_{i}']) else 0 |
|
weekly_wr = float(row[f'7d_WR_{i}']) if pd.notna(row[f'7d_WR_{i}']) else 0 |
|
profile_wr = float(row['win_rate']) if pd.notna(row['win_rate']) else 0.5 |
|
|
|
if weekly_games > 0: |
|
wr_trend = (weekly_wr - profile_wr) / profile_wr if profile_wr > 0 else 0 |
|
weekly_intensity = min(weekly_games / 10, 1.0) |
|
win_ratio = weekly_wins / weekly_games if weekly_games > 0 else 0 |
|
|
|
weekly_performance = ( |
|
(weekly_wr * 0.4) + |
|
(max(min(wr_trend, 1), -1) * 0.2) + |
|
(weekly_intensity * 0.2) + |
|
(win_ratio * 0.2) |
|
) |
|
|
|
if weekly_games >= 5: |
|
if weekly_performance < 0.4: |
|
weekly_performance *= 0.8 |
|
elif weekly_performance > 0.7: |
|
weekly_performance *= 1.2 |
|
|
|
champion_scores['weekly_score'] = weekly_performance * ( |
|
0.7 + (0.3 * min(weekly_games / 5, 1.0)) |
|
) |
|
break |
|
|
|
|
|
if champion in weekly_meta['champion'].values: |
|
weekly_row = weekly_meta[weekly_meta['champion'] == champion].iloc[0] |
|
rank = weekly_row['rank'] |
|
games = weekly_row['games'] |
|
pick_rate = weekly_row['pick'] |
|
ban_rate = weekly_row['ban'] |
|
|
|
weight = ( |
|
1 / rank * 0.5 + |
|
games / 100 * 0.3 + |
|
pick_rate * 0.1 - |
|
ban_rate * 0.1 |
|
) |
|
|
|
champion_scores['meta_score'] = weight |
|
|
|
|
|
for i in range(1, 8): |
|
if row.get(f'season_champ_{i}') == champion: |
|
wr = float(row[f'wr_ssn_{i}']) if pd.notna(row[f'wr_ssn_{i}']) else 0 |
|
games = float(row[f'games_ssn_{i}']) if pd.notna(row[f'games_ssn_{i}']) else 0 |
|
kda = float(row[f'kda_ssn_{i}']) if pd.notna(row[f'kda_ssn_{i}']) else 0 |
|
|
|
champion_scores['season_score'] = ( |
|
wr * 0.7 + |
|
(kda / 10) * 0.3 |
|
) * (games / 100) |
|
break |
|
|
|
|
|
for i in range(1, 17): |
|
if row.get(f'mastery_champ_{i}') == champion: |
|
mastery = float(row[f'm_lv_{i}']) if pd.notna(row[f'm_lv_{i}']) else 0 |
|
champion_scores['mastery_score'] = mastery / 7 |
|
break |
|
|
|
|
|
base_score = ( |
|
champion_scores['recent_score'] * weights['recent'] + |
|
champion_scores['weekly_score'] * weights['weekly'] + |
|
champion_scores['meta_score'] * weights['meta'] + |
|
champion_scores['season_score'] * weights['season'] + |
|
champion_scores['mastery_score'] * weights['mastery'] |
|
) |
|
|
|
|
|
|
|
base_score_before_penalty = base_score |
|
|
|
|
|
if champion in tier_map: |
|
highest_tier = min(tier_map[champion]) |
|
if highest_tier in tier_penalties: |
|
base_score *= tier_penalties[highest_tier] |
|
|
|
|
|
if consider_team_comp: |
|
|
|
for i in range(1, 5): |
|
team_col = f'team_champ{i}' |
|
if team_col in row and pd.notna(row[team_col]): |
|
if row[team_col] == champion: |
|
base_score = 0 |
|
break |
|
|
|
|
|
if base_score != 0: |
|
counter_penalty = 0 |
|
counter_debug = [] |
|
|
|
for i in range(1, 6): |
|
opp_col = f'opp_champ{i}' |
|
if opp_col in row and pd.notna(row[opp_col]): |
|
opp_champ = row[opp_col] |
|
if opp_champ == champion: |
|
base_score = 0 |
|
break |
|
if champion in counter_map and opp_champ in counter_map[champion]: |
|
counter_penalty += 0.1 |
|
counter_debug.append(opp_champ) |
|
|
|
if counter_penalty > 0: |
|
base_score = base_score * (1 - counter_penalty) |
|
|
|
|
|
batch_scores[champion][batch_idx] = max(base_score, 0) |
|
|
|
|
|
if debug == champion: |
|
counter_list = [] |
|
for i in range(1, 6): |
|
opp_col = f'opp_champ{i}' |
|
if opp_col in row and pd.notna(row[opp_col]): |
|
if champion in counter_map and row[opp_col] in counter_map[champion]: |
|
counter_list.append(row[opp_col]) |
|
|
|
debug_row = { |
|
'champion': row['champion'], |
|
'recent_score': champion_scores['recent_score'], |
|
'weekly_score': champion_scores['weekly_score'], |
|
'meta_score': champion_scores['meta_score'], |
|
'base_score': base_score_before_penalty, |
|
'final_score': base_score, |
|
'counter_penalty': counter_penalty if consider_team_comp else 0, |
|
'final_score_actual': feature_dict[row['champion']][idx] if row['champion'] in feature_dict else base_score, |
|
'counter_list_debug': counter_list |
|
} |
|
debug_data.append(debug_row) |
|
|
|
|
|
for champion in batch_scores: |
|
if champion not in feature_dict: |
|
feature_dict[champion] = np.zeros(total_rows) |
|
feature_dict[champion][batch_start:batch_end] = batch_scores[champion] |
|
|
|
|
|
temp_df = pd.DataFrame({ |
|
**{col: feature_dict[col] for col in original_columns}, |
|
**{champion: feature_dict[champion] for champion in all_champions} |
|
}) |
|
|
|
batch_save_file = os.path.join("util", "data", f"feature_eng_stats.csv") |
|
temp_df.to_csv(batch_save_file, index=False) |
|
print(f"Saved batch progress to {batch_save_file}") |
|
|
|
if debug: |
|
print(f"{debug} is countered by: {counter_map[debug]}") |
|
|
|
|
|
if debug: |
|
debug_df = pd.DataFrame(debug_data) |
|
print("\nDebug Data:") |
|
print(debug_df) |
|
|
|
|
|
champion_features = pd.DataFrame(feature_dict) |
|
|
|
|
|
features = pd.concat([ |
|
merged_player_stats[original_columns], |
|
champion_features[[col for col in champion_features.columns if col not in original_columns]] |
|
], axis=1) |
|
|
|
|
|
if 'champion' in features.columns: |
|
columns = ['champion'] + [col for col in features.columns if col != 'champion'] |
|
features = features[columns] |
|
|
|
|
|
print(f"Saved features in data frame.") |
|
|
|
return features |
|
|
|
except Exception as e: |
|
print(f"\nError occurred: {str(e)}") |
|
return None |
|
|
|
''' |
|
def get_weekly_meta(): |
|
BASE_URL = "https://www.op.gg/statistics/champions?tier=challenger&period=week&mode=ranked" |
|
driver = setup_driver() |
|
|
|
try: |
|
driver.get(BASE_URL) |
|
table = WebDriverWait(driver, 20).until( |
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#content-container > div:nth-child(2) > table")) |
|
) |
|
|
|
# Extract table rows |
|
rows = table.find_elements(By.TAG_NAME, "tr") |
|
|
|
# Define the column order |
|
columns = ["rank", "champion", "games", "KDA", "WR", "pick", "ban", "cs", "gold"] |
|
|
|
data = [] |
|
for row in rows[1:]: # Skip the header row |
|
cells = row.find_elements(By.TAG_NAME, "td") |
|
row_data = [cell.text for cell in cells] |
|
|
|
if len(row_data) >= len(columns): |
|
# Remove ":1" from KDA format |
|
row_data[3] = row_data[3].replace(":1", "") |
|
# Convert WR, pick, and ban percentages to decimals |
|
row_data[4] = convert_percentage_to_decimal(row_data[4]) |
|
row_data[5] = convert_percentage_to_decimal(row_data[5]) |
|
row_data[6] = convert_percentage_to_decimal(row_data[6]) |
|
# Remove commas from the gold values |
|
row_data[8] = int(row_data[8].replace(",", "")) |
|
|
|
data.append(row_data[:len(columns)]) |
|
|
|
# Create a DataFrame with the extracted data |
|
df = pd.DataFrame(data, columns=columns) |
|
|
|
# Print confirmation message |
|
print(f"Saved weekly meta to dataframe: \n", df) |
|
|
|
return df |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
finally: |
|
driver.quit() |
|
|
|
def get_meta_stats(): |
|
"""Main function to scrape champion data with improved error handling and logging""" |
|
driver = None |
|
|
|
try: |
|
driver = setup_driver() |
|
all_roles_data = [] |
|
|
|
for role in ROLES: |
|
role_url = BASE_URL.format(role=role) |
|
role_data = get_champion_table_data(driver, role_url, role) |
|
all_roles_data.extend(role_data) |
|
|
|
if not all_roles_data: |
|
print("No data was collected from any role") |
|
return pd.DataFrame() |
|
|
|
df = pd.DataFrame(all_roles_data) |
|
|
|
print(f"Saved meta stats to df:\n", df) |
|
return df |
|
|
|
except Exception as e: |
|
print(f"Error in get_meta_stats: {e}") |
|
return pd.DataFrame() |
|
|
|
finally: |
|
if driver: |
|
driver.quit() |
|
''' |
|
|
|
def create_app_user_training_df(url): |
|
try: |
|
|
|
|
|
|
|
|
|
print("========= Inside get_user_training_df(player_opgg_url) ============= \n") |
|
print("input url: ", url, "\n") |
|
|
|
|
|
if not url or not isinstance(url, str): |
|
raise ValueError("Invalid URL provided") |
|
|
|
|
|
match = re.search(r"/summoners/(\w+)/([\w\-]+)$", url) |
|
if not match: |
|
raise ValueError(f"Could not parse region and username from URL: {url}") |
|
|
|
region = match.group(1) |
|
username = match.group(2) |
|
print(f"Extracted - Region: {region}, Username: {username}") |
|
|
|
|
|
print("Fetching recent matches...") |
|
recent_stats = get_matches_stats(region, username) |
|
|
|
|
|
if recent_stats is None or recent_stats.empty: |
|
raise ValueError("recent_stats is empty. type(recent_stats): ", type(recent_stats) , " recent_stats: \n", recent_stats) |
|
|
|
print("Recent matches columns:", recent_stats.columns.tolist()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
recent_stats['player_id'] = recent_stats['player_id'].str.replace(" #", "-", regex=False) |
|
print("Processed player_ids:", recent_stats['player_id'].head()) |
|
|
|
|
|
print("Fetching player stats...") |
|
player_stats = get_player_stats(region, username) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Merging stats...") |
|
merged_stats = merge_stats(recent_stats, player_stats) |
|
|
|
|
|
if merged_stats is None or merged_stats.empty: |
|
raise ValueError("Failed to merge stats") |
|
|
|
print("Merged stats columns:", merged_stats.columns.tolist()) |
|
|
|
|
|
print("Creating champion features...") |
|
training_features = create_champion_features_and_return_df( |
|
merged_player_stats=merged_stats, |
|
debug=None, |
|
consider_team_comp=True, |
|
test_mode=False |
|
) |
|
|
|
|
|
if training_features is None or training_features.empty: |
|
raise ValueError("Failed to create training features") |
|
|
|
print("Training features created successfully") |
|
return training_features |
|
|
|
except Exception as e: |
|
import traceback |
|
error_trace = traceback.format_exc() |
|
print(f"Error in create_app_user_training_df:\n{error_trace}") |
|
raise Exception(f"Failed to create training dataframe: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|