Spaces:
Sleeping
Sleeping
import time, os | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import pandas as pd | |
from urllib.parse import unquote | |
from webdriver_manager.chrome import ChromeDriverManager | |
from webdriver_manager.core.os_manager import ChromeType | |
from helper import convert_to_minutes, convert_percentage_to_decimal, convert_tier_to_number, convert_result_to_binary, format_summoner_name, convert_to_displayname | |
def setup_driver(): | |
options = Options() | |
prefs = { | |
'profile.default_content_setting_values': {'notifications': 2}, | |
'profile.managed_default_content_settings': {'images': 2} | |
} | |
options.add_experimental_option('prefs', prefs) | |
options.add_experimental_option('excludeSwitches', ['enable-logging']) | |
for arg in ['--headless', '--no-sandbox', '--disable-dev-shm-usage', | |
'--disable-gpu', '--window-size=1920,1080']: | |
options.add_argument(arg) | |
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124') | |
# Check if we're running in Hugging Face Spaces or locally | |
if 'HF_SPACE' in os.environ: | |
# Hugging Face Space is detected, handle accordingly (example for versioning) | |
print("Running on Hugging Face Space.") | |
chromedriver_path = ChromeDriverManager().install() | |
else: | |
# Local environment setup | |
print("Running chrome webdriver.") | |
chromedriver_path = ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install() | |
# Create the Service object using the installed chromedriver | |
service = Service(executable_path=chromedriver_path) | |
# Return the configured WebDriver instance | |
driver = webdriver.Chrome(service=service, options=options) | |
return driver | |
def get_tooltip_date(driver, element): | |
try: | |
driver.execute_script(""" | |
arguments[0].scrollIntoView({block: 'center'}); | |
document.querySelectorAll('span.react-tooltip-lite').forEach(e => e.remove()); | |
arguments[0].dispatchEvent(new MouseEvent('mouseover', { | |
view: window, bubbles: true, cancelable: true | |
})); | |
""", element) | |
time.sleep(0.3) | |
return driver.execute_script(""" | |
return Array.from(document.querySelectorAll('span.react-tooltip-lite')) | |
.find(t => t.offsetParent !== null)?.textContent || null; | |
""") | |
except: return None | |
def extract_match_data(match): | |
selectors = { | |
'time_stamp': "div.time-stamp > div", | |
'game_type': "div.game-type", | |
'result': "div.result", | |
'length': "div.length", | |
'kda': "div.kda", | |
'kda_ratio': "div.kda-ratio", | |
'cs': "div.cs", | |
'avg_tier': "div.avg-tier", | |
'laning': "div.laning", | |
'kill_participation': "div.p-kill", | |
'champion_img': "div.info a.champion img", | |
'champion_level': "div.info a.champion span.champion-level" | |
} | |
data = {} | |
try: | |
for key, selector in selectors.items(): | |
element = match.find_element(By.CSS_SELECTOR, selector) | |
if key == 'champion_img': | |
data[key] = element.get_attribute('alt') | |
elif key == 'laning': | |
data[key] = element.text.replace('\n', '') # Remove newlines from laning data | |
else: | |
data[key] = element.text | |
except Exception as e: | |
print(f"Error extracting match data: {e}") | |
return data | |
def get_players_info(match): | |
try: | |
players = [] | |
player_elements = match.find_elements(By.CSS_SELECTOR, "div.css-pp7uqb.e1xevas21")[:10] | |
for player in player_elements: | |
champion = player.find_element(By.CSS_SELECTOR, "div.icon img").get_attribute("alt") | |
href = player.find_element(By.CSS_SELECTOR, "div.name a").get_attribute("href") | |
region, name = href.split('/')[-2:] | |
# Decode the URL-encoded name | |
decoded_name = unquote(name) | |
#print(f"Found player: {decoded_name} with champion {champion}") | |
players.append({ | |
"champion": champion, | |
"region": region, | |
"name": decoded_name | |
}) | |
return players | |
except Exception as e: | |
print(f"Error getting players info: {e}") | |
return [] | |
def convert_laning_ratio(laning_str): | |
"""Convert laning string (e.g., 'Laning 70:30') to decimal ratio""" | |
try: | |
# Extract the ratio part (e.g., '70:30' from 'Laning 70:30') | |
ratio_part = laning_str.split('Laning')[-1].strip() | |
# Split by ':' and convert to numbers | |
first_num, second_num = map(int, ratio_part.split(':')) | |
# Calculate ratio | |
if second_num != 0: # Avoid division by zero | |
ratio = round(first_num / second_num, 2) | |
return ratio | |
return 0.0 | |
except Exception as e: | |
print(f"Laning conversion error for '{laning_str}': {e}") | |
return 0.0 | |
def extract_cs_number(cs_str): | |
"""Extract pure CS number from string (e.g., 'CS 123 (7.9)' -> 123)""" | |
try: | |
# Extract first number from the string | |
cs_number = ''.join(filter(str.isdigit, cs_str.split('(')[0])) | |
return int(cs_number) if cs_number else 0 | |
except: | |
return 0 | |
def extract_cs_per_min(cs_str): | |
"""Extract CS per minute from string (e.g., 'CS 123 (7.9)' -> 7.9)""" | |
try: | |
# Extract number between parentheses | |
cs_per_min = cs_str.split('(')[1].split(')')[0] | |
return float(cs_per_min) | |
except: | |
return 0.0 | |
def process_match_data(match_data, username, players): | |
try: | |
# Format username for comparison - ensure it's in display format | |
display_name = convert_to_displayname(username) | |
#print(f"\nInput username: {username}") | |
#print(f"Converted display name: {display_name}") | |
# # Debug print all players and their converted names | |
# print("\nAll players:") | |
# for p in players: | |
# orig_name = p['name'] | |
# conv_name = convert_to_displayname(orig_name) | |
# print(f"Original: {orig_name} -> Converted: {conv_name}") | |
# Find player index using normalized comparison | |
player_index = next((i for i, p in enumerate(players) | |
if convert_to_displayname(p['name']).lower().replace(' ', '') == | |
display_name.lower().replace(' ', '')), -1) | |
if player_index == -1: | |
print(f"\nWarning: Player {display_name} not found in players list") | |
print("Available players:", [convert_to_displayname(p['name']) for p in players]) | |
return None | |
#print(f"\nFound player at index: {player_index}") | |
team = "blue" if player_index < 5 else "red" | |
#print(f"Team: {team}") | |
# Modify how teammates and opponents are filtered | |
if player_index < 5: | |
# Player is on blue team | |
teammates = [p for i, p in enumerate(players[:5]) | |
if i != player_index] # Use index comparison instead of name | |
opponents = players[5:] # All red team players | |
else: | |
# Player is on red team | |
teammates = [p for i, p in enumerate(players[5:]) | |
if i != (player_index - 5)] # Adjust index for red team | |
opponents = players[:5] # All blue team players | |
kda_parts = match_data.get('kda', '0/0/0').strip().split('/') | |
kills, deaths, assists = [kda_parts[i] if i < len(kda_parts) else "0" for i in range(3)] | |
kda_ratio = match_data.get("kda_ratio", "0").strip().replace(":1 KDA", "") | |
kill_participation = convert_percentage_to_decimal(match_data.get("kill_participation", "0%")) | |
laning_ratio = convert_laning_ratio(match_data.get("laning", "0:0")) | |
cs = extract_cs_number(match_data.get("cs", "0")) | |
cpm = extract_cs_per_min(match_data.get("cs", "0")) | |
match_length_str = match_data.get("length", "0m 0s") | |
match_length_mins = convert_to_minutes(match_length_str) | |
# Convert tier to number | |
avg_tier_num = convert_tier_to_number(match_data.get("avg_tier", "")) | |
result_num = convert_result_to_binary(match_data.get("result", "")) | |
match_row = { | |
"player_id": display_name, # Use display_name here | |
"date": match_data.get("match_date", ""), | |
"champion": match_data.get("champion_img", ""), | |
"level": match_data.get("champion_level", ""), | |
"team": team, | |
"result": result_num, | |
"match_length_mins": match_length_mins, | |
"kill": kills.strip(), | |
"death": deaths.strip(), | |
"assist": assists.strip(), | |
"kda_ratio": kda_ratio, | |
"kill_participation": kill_participation, | |
"laning": laning_ratio, | |
"cs": cs, | |
"cs_per_min": cpm, | |
"avg_tier": avg_tier_num | |
} | |
# Add teammates and opponents with display format | |
for i, (team_list, prefix) in enumerate([(teammates, "team"), (opponents, "opp")]): | |
for j, player in enumerate(team_list, 1): | |
if j <= 5: # Ensure we don't exceed 5 players per team | |
match_row[f"{prefix}mates{j}"] = convert_to_displayname(player["name"]) | |
match_row[f"{prefix}_champ{j}"] = player["champion"] | |
return match_row | |
except Exception as e: | |
print(f"Error processing match: {e}") | |
return None | |
def get_matches_stats(region, username, max_retries=2): | |
""" | |
Get match stats for a single player with retry mechanism | |
""" | |
print("=========================== inside get_matches_stats ===========================\n") | |
if not region or not username: | |
raise ValueError("Both 'region' and 'username' must be provided") | |
attempt_details = [] # To collect detailed logs for debugging | |
driver = None | |
retry_count = 0 | |
while retry_count <= max_retries: | |
try: | |
# Initialize the WebDriver | |
attempt_details.append("Setting up WebDriver...") | |
driver = setup_driver() | |
driver.set_page_load_timeout(20) # Set page load timeout | |
attempt_details.append("WebDriver setup complete.") | |
# Construct the URL | |
url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED" | |
attempt_details.append(f"Accessing URL: {url}") | |
driver.get(url) | |
# Wait for matches container to load | |
attempt_details.append("Waiting for matches container...") | |
matches_container = WebDriverWait(driver, 20).until( | |
EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0")) | |
) | |
attempt_details.append("Matches container found.") | |
# Find match elements | |
attempt_details.append("Finding match elements...") | |
match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90") | |
attempt_details.append(f"Found {len(match_elements)} matches.") | |
matches_data = [] | |
# Process each match | |
for i, match in enumerate(match_elements, 1): | |
attempt_details.append(f"Processing match {i}...") | |
try: | |
# Extract data for the match | |
match_data = extract_match_data(match) | |
attempt_details.append(f"Extracted match data for match {i}: {match_data}") | |
# Get player info | |
players = get_players_info(match) | |
attempt_details.append(f"Extracted players info for match {i}: {players}") | |
# Get match date | |
tooltip_element = match.find_element(By.CSS_SELECTOR, "div.time-stamp > div") | |
match_date = get_tooltip_date(driver, tooltip_element) | |
match_data['match_date'] = match_date | |
attempt_details.append(f"Extracted match date for match {i}: {match_date}") | |
# Process and validate match data | |
processed_data = process_match_data(match_data, username, players) | |
if processed_data: | |
matches_data.append(processed_data) | |
attempt_details.append(f"Processed match data for match {i}: {processed_data}") | |
else: | |
attempt_details.append(f"Processed match {i} returned no valid data.") | |
except Exception as match_error: | |
raise RuntimeError(f"Error processing match {i}: {match_error}") | |
# Return DataFrame if matches are found | |
if matches_data: | |
print("=========================== Exiting get_matches_stats successfully ===========================\n") | |
return pd.DataFrame(matches_data) | |
else: | |
raise RuntimeError("No valid matches found") | |
except Exception as e: | |
retry_count += 1 | |
attempt_details.append(f"Attempt {retry_count} failed: {e}") | |
if retry_count <= max_retries: | |
attempt_details.append(f"Retrying... ({retry_count}/{max_retries})") | |
time.sleep(5) # Wait 5 seconds before retrying | |
else: | |
attempt_details.append("Max retries reached. No data retrieved.") | |
error_log = "\n".join(attempt_details) | |
raise RuntimeError(f"get_matches_stats failed after {max_retries} retries:\n{error_log}") | |
finally: | |
if driver: | |
attempt_details.append("Closing WebDriver...") | |
driver.quit() | |
attempt_details.append("WebDriver closed.") | |
error_log = "\n".join(attempt_details) | |
raise RuntimeError(f"Exiting get_matches_stats with no data:\n{error_log}") | |
def get_multiple_matches_stats(players_df): | |
""" | |
Get match stats for multiple players from a DataFrame | |
Parameters: | |
players_df: DataFrame with columns 'region' and 'username' | |
""" | |
save_dir = "util/data" | |
os.makedirs(save_dir, exist_ok=True) | |
checkpoint_file = os.path.join(save_dir, "recent_matches_checkpoint.csv") | |
all_matches_dfs = [] | |
error_players = [] | |
# Load checkpoint if exists | |
start_idx = 0 | |
if os.path.exists(checkpoint_file): | |
try: | |
checkpoint_df = pd.read_csv(checkpoint_file) | |
all_matches_dfs = [checkpoint_df] | |
# Get the number of players already processed | |
processed_players = set(checkpoint_df['player_id']) | |
# Filter out already processed players | |
players_df = players_df[~players_df['username'].isin(processed_players)] | |
print(f"Loaded checkpoint with {len(processed_players)} players already processed") | |
except Exception as e: | |
print(f"Error loading checkpoint: {e}") | |
print(f"Processing matches for {len(players_df)} remaining players...") | |
for idx, row in players_df.iterrows(): | |
region = row['region'].lower() # Ensure region is lowercase | |
username = row['username'] | |
try: | |
# Format the username | |
formatted_username = format_summoner_name(username) | |
print(f"\nProcessing matches for player {idx + 1}/{len(players_df)}: {username} ({region})") | |
#print(f"Formatted username: {formatted_username}") | |
# Add delay between requests | |
if idx > 0: | |
time.sleep(2) | |
matches_df = get_matches_stats(region, formatted_username) | |
if matches_df is not None and not matches_df.empty: | |
# Add player identification columns | |
matches_df['player_id'] = username # Original username | |
matches_df['region'] = region | |
all_matches_dfs.append(matches_df) | |
print(f"Successfully processed matches for {username}") | |
#print(f"Found {len(matches_df)} matches") | |
# Save checkpoint every 5 players | |
if len(all_matches_dfs) % 5 == 0: | |
checkpoint_save = pd.concat(all_matches_dfs, ignore_index=True) | |
checkpoint_save.to_csv(checkpoint_file, index=False) | |
print(f"Saved checkpoint after processing {len(all_matches_dfs)} players") | |
else: | |
print(f"No match data found for {username}") | |
error_players.append({ | |
'region': region, | |
'username': username, | |
'formatted_username': formatted_username, | |
'error': 'No match data found' | |
}) | |
except Exception as e: | |
print(f"Error processing matches for {username}: {e}") | |
error_players.append({ | |
'region': region, | |
'username': username, | |
'formatted_username': formatted_username if 'formatted_username' in locals() else 'Error in formatting', | |
'error': str(e) | |
}) | |
continue | |
# Combine all match stats | |
if all_matches_dfs: | |
final_df = pd.concat(all_matches_dfs, ignore_index=True) | |
filepath = os.path.join(save_dir, f"recent_matches.csv") | |
final_df.to_csv(filepath, index=False) | |
print(f"\nSaved combined match stats for {len(all_matches_dfs)} players to {filepath}") | |
# Clean up checkpoint file | |
if os.path.exists(checkpoint_file): | |
os.remove(checkpoint_file) | |
print("Removed checkpoint file after successful completion") | |
# Save error log if any errors occurred | |
if error_players: | |
error_df = pd.DataFrame(error_players) | |
error_filepath = os.path.join(save_dir, f"recent_matches_error.csv") | |
error_df.to_csv(error_filepath, index=False) | |
print(f"Saved error log to {error_filepath}") | |
# Print summary | |
print("\nSummary:") | |
print(f"Total players processed: {len(players_df)}") | |
print(f"Successful: {len(all_matches_dfs)}") | |
print(f"Failed: {len(error_players)}") | |
print(f"Total matches collected: {len(final_df)}") | |
return final_df | |
else: | |
print("\nNo match data was collected") | |
return None | |