import time, os from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pandas as pd from urllib.parse import unquote from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.core.os_manager import ChromeType from helper import convert_to_minutes, convert_percentage_to_decimal, convert_tier_to_number, convert_result_to_binary, format_summoner_name, convert_to_displayname def setup_driver(): options = Options() prefs = { 'profile.default_content_setting_values': {'notifications': 2}, 'profile.managed_default_content_settings': {'images': 2} } options.add_experimental_option('prefs', prefs) options.add_experimental_option('excludeSwitches', ['enable-logging']) for arg in ['--headless', '--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080']: options.add_argument(arg) options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124') # Check if we're running in Hugging Face Spaces or locally if 'HF_SPACE' in os.environ: # Hugging Face Space is detected, handle accordingly (example for versioning) print("Running on Hugging Face Space.") chromedriver_path = ChromeDriverManager().install() else: # Local environment setup print("Running chrome webdriver.") chromedriver_path = ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install() # Create the Service object using the installed chromedriver service = Service(executable_path=chromedriver_path) # Return the configured WebDriver instance driver = webdriver.Chrome(service=service, options=options) return driver def get_tooltip_date(driver, element): try: driver.execute_script(""" arguments[0].scrollIntoView({block: 'center'}); document.querySelectorAll('span.react-tooltip-lite').forEach(e => e.remove()); arguments[0].dispatchEvent(new MouseEvent('mouseover', { view: window, bubbles: true, cancelable: true })); """, element) time.sleep(0.3) return driver.execute_script(""" return Array.from(document.querySelectorAll('span.react-tooltip-lite')) .find(t => t.offsetParent !== null)?.textContent || null; """) except: return None def extract_match_data(match): selectors = { 'time_stamp': "div.time-stamp > div", 'game_type': "div.game-type", 'result': "div.result", 'length': "div.length", 'kda': "div.kda", 'kda_ratio': "div.kda-ratio", 'cs': "div.cs", 'avg_tier': "div.avg-tier", 'laning': "div.laning", 'kill_participation': "div.p-kill", 'champion_img': "div.info a.champion img", 'champion_level': "div.info a.champion span.champion-level" } data = {} try: for key, selector in selectors.items(): element = match.find_element(By.CSS_SELECTOR, selector) if key == 'champion_img': data[key] = element.get_attribute('alt') elif key == 'laning': data[key] = element.text.replace('\n', '') # Remove newlines from laning data else: data[key] = element.text except Exception as e: print(f"Error extracting match data: {e}") return data def get_players_info(match): try: players = [] player_elements = match.find_elements(By.CSS_SELECTOR, "div.css-pp7uqb.e1xevas21")[:10] for player in player_elements: champion = player.find_element(By.CSS_SELECTOR, "div.icon img").get_attribute("alt") href = player.find_element(By.CSS_SELECTOR, "div.name a").get_attribute("href") region, name = href.split('/')[-2:] # Decode the URL-encoded name decoded_name = unquote(name) #print(f"Found player: {decoded_name} with champion {champion}") players.append({ "champion": champion, "region": region, "name": decoded_name }) return players except Exception as e: print(f"Error getting players info: {e}") return [] def convert_laning_ratio(laning_str): """Convert laning string (e.g., 'Laning 70:30') to decimal ratio""" try: # Extract the ratio part (e.g., '70:30' from 'Laning 70:30') ratio_part = laning_str.split('Laning')[-1].strip() # Split by ':' and convert to numbers first_num, second_num = map(int, ratio_part.split(':')) # Calculate ratio if second_num != 0: # Avoid division by zero ratio = round(first_num / second_num, 2) return ratio return 0.0 except Exception as e: print(f"Laning conversion error for '{laning_str}': {e}") return 0.0 def extract_cs_number(cs_str): """Extract pure CS number from string (e.g., 'CS 123 (7.9)' -> 123)""" try: # Extract first number from the string cs_number = ''.join(filter(str.isdigit, cs_str.split('(')[0])) return int(cs_number) if cs_number else 0 except: return 0 def extract_cs_per_min(cs_str): """Extract CS per minute from string (e.g., 'CS 123 (7.9)' -> 7.9)""" try: # Extract number between parentheses cs_per_min = cs_str.split('(')[1].split(')')[0] return float(cs_per_min) except: return 0.0 def process_match_data(match_data, username, players): try: # Format username for comparison - ensure it's in display format display_name = convert_to_displayname(username) #print(f"\nInput username: {username}") #print(f"Converted display name: {display_name}") # # Debug print all players and their converted names # print("\nAll players:") # for p in players: # orig_name = p['name'] # conv_name = convert_to_displayname(orig_name) # print(f"Original: {orig_name} -> Converted: {conv_name}") # Find player index using normalized comparison player_index = next((i for i, p in enumerate(players) if convert_to_displayname(p['name']).lower().replace(' ', '') == display_name.lower().replace(' ', '')), -1) if player_index == -1: print(f"\nWarning: Player {display_name} not found in players list") print("Available players:", [convert_to_displayname(p['name']) for p in players]) return None #print(f"\nFound player at index: {player_index}") team = "blue" if player_index < 5 else "red" #print(f"Team: {team}") # Modify how teammates and opponents are filtered if player_index < 5: # Player is on blue team teammates = [p for i, p in enumerate(players[:5]) if i != player_index] # Use index comparison instead of name opponents = players[5:] # All red team players else: # Player is on red team teammates = [p for i, p in enumerate(players[5:]) if i != (player_index - 5)] # Adjust index for red team opponents = players[:5] # All blue team players kda_parts = match_data.get('kda', '0/0/0').strip().split('/') kills, deaths, assists = [kda_parts[i] if i < len(kda_parts) else "0" for i in range(3)] kda_ratio = match_data.get("kda_ratio", "0").strip().replace(":1 KDA", "") kill_participation = convert_percentage_to_decimal(match_data.get("kill_participation", "0%")) laning_ratio = convert_laning_ratio(match_data.get("laning", "0:0")) cs = extract_cs_number(match_data.get("cs", "0")) cpm = extract_cs_per_min(match_data.get("cs", "0")) match_length_str = match_data.get("length", "0m 0s") match_length_mins = convert_to_minutes(match_length_str) # Convert tier to number avg_tier_num = convert_tier_to_number(match_data.get("avg_tier", "")) result_num = convert_result_to_binary(match_data.get("result", "")) match_row = { "player_id": display_name, # Use display_name here "date": match_data.get("match_date", ""), "champion": match_data.get("champion_img", ""), "level": match_data.get("champion_level", ""), "team": team, "result": result_num, "match_length_mins": match_length_mins, "kill": kills.strip(), "death": deaths.strip(), "assist": assists.strip(), "kda_ratio": kda_ratio, "kill_participation": kill_participation, "laning": laning_ratio, "cs": cs, "cs_per_min": cpm, "avg_tier": avg_tier_num } # Add teammates and opponents with display format for i, (team_list, prefix) in enumerate([(teammates, "team"), (opponents, "opp")]): for j, player in enumerate(team_list, 1): if j <= 5: # Ensure we don't exceed 5 players per team match_row[f"{prefix}mates{j}"] = convert_to_displayname(player["name"]) match_row[f"{prefix}_champ{j}"] = player["champion"] return match_row except Exception as e: print(f"Error processing match: {e}") return None def get_matches_stats(region, username, max_retries=2): """ Get match stats for a single player with retry mechanism """ print("=========================== inside get_matches_stats ===========================\n") if not region or not username: raise ValueError("Both 'region' and 'username' must be provided") attempt_details = [] # To collect detailed logs for debugging driver = None retry_count = 0 while retry_count <= max_retries: try: # Initialize the WebDriver attempt_details.append("Setting up WebDriver...") driver = setup_driver() driver.set_page_load_timeout(20) # Set page load timeout attempt_details.append("WebDriver setup complete.") # Construct the URL url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED" attempt_details.append(f"Accessing URL: {url}") driver.get(url) # Wait for matches container to load attempt_details.append("Waiting for matches container...") matches_container = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0")) ) attempt_details.append("Matches container found.") # Find match elements attempt_details.append("Finding match elements...") match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90") attempt_details.append(f"Found {len(match_elements)} matches.") matches_data = [] # Process each match for i, match in enumerate(match_elements, 1): attempt_details.append(f"Processing match {i}...") try: # Extract data for the match match_data = extract_match_data(match) attempt_details.append(f"Extracted match data for match {i}: {match_data}") # Get player info players = get_players_info(match) attempt_details.append(f"Extracted players info for match {i}: {players}") # Get match date tooltip_element = match.find_element(By.CSS_SELECTOR, "div.time-stamp > div") match_date = get_tooltip_date(driver, tooltip_element) match_data['match_date'] = match_date attempt_details.append(f"Extracted match date for match {i}: {match_date}") # Process and validate match data processed_data = process_match_data(match_data, username, players) if processed_data: matches_data.append(processed_data) attempt_details.append(f"Processed match data for match {i}: {processed_data}") else: attempt_details.append(f"Processed match {i} returned no valid data.") except Exception as match_error: raise RuntimeError(f"Error processing match {i}: {match_error}") # Return DataFrame if matches are found if matches_data: print("=========================== Exiting get_matches_stats successfully ===========================\n") return pd.DataFrame(matches_data) else: raise RuntimeError("No valid matches found") except Exception as e: retry_count += 1 attempt_details.append(f"Attempt {retry_count} failed: {e}") if retry_count <= max_retries: attempt_details.append(f"Retrying... ({retry_count}/{max_retries})") time.sleep(5) # Wait 5 seconds before retrying else: attempt_details.append("Max retries reached. No data retrieved.") error_log = "\n".join(attempt_details) raise RuntimeError(f"get_matches_stats failed after {max_retries} retries:\n{error_log}") finally: if driver: attempt_details.append("Closing WebDriver...") driver.quit() attempt_details.append("WebDriver closed.") error_log = "\n".join(attempt_details) raise RuntimeError(f"Exiting get_matches_stats with no data:\n{error_log}") def get_multiple_matches_stats(players_df): """ Get match stats for multiple players from a DataFrame Parameters: players_df: DataFrame with columns 'region' and 'username' """ save_dir = "util/data" os.makedirs(save_dir, exist_ok=True) checkpoint_file = os.path.join(save_dir, "recent_matches_checkpoint.csv") all_matches_dfs = [] error_players = [] # Load checkpoint if exists start_idx = 0 if os.path.exists(checkpoint_file): try: checkpoint_df = pd.read_csv(checkpoint_file) all_matches_dfs = [checkpoint_df] # Get the number of players already processed processed_players = set(checkpoint_df['player_id']) # Filter out already processed players players_df = players_df[~players_df['username'].isin(processed_players)] print(f"Loaded checkpoint with {len(processed_players)} players already processed") except Exception as e: print(f"Error loading checkpoint: {e}") print(f"Processing matches for {len(players_df)} remaining players...") for idx, row in players_df.iterrows(): region = row['region'].lower() # Ensure region is lowercase username = row['username'] try: # Format the username formatted_username = format_summoner_name(username) print(f"\nProcessing matches for player {idx + 1}/{len(players_df)}: {username} ({region})") #print(f"Formatted username: {formatted_username}") # Add delay between requests if idx > 0: time.sleep(2) matches_df = get_matches_stats(region, formatted_username) if matches_df is not None and not matches_df.empty: # Add player identification columns matches_df['player_id'] = username # Original username matches_df['region'] = region all_matches_dfs.append(matches_df) print(f"Successfully processed matches for {username}") #print(f"Found {len(matches_df)} matches") # Save checkpoint every 5 players if len(all_matches_dfs) % 5 == 0: checkpoint_save = pd.concat(all_matches_dfs, ignore_index=True) checkpoint_save.to_csv(checkpoint_file, index=False) print(f"Saved checkpoint after processing {len(all_matches_dfs)} players") else: print(f"No match data found for {username}") error_players.append({ 'region': region, 'username': username, 'formatted_username': formatted_username, 'error': 'No match data found' }) except Exception as e: print(f"Error processing matches for {username}: {e}") error_players.append({ 'region': region, 'username': username, 'formatted_username': formatted_username if 'formatted_username' in locals() else 'Error in formatting', 'error': str(e) }) continue # Combine all match stats if all_matches_dfs: final_df = pd.concat(all_matches_dfs, ignore_index=True) filepath = os.path.join(save_dir, f"recent_matches.csv") final_df.to_csv(filepath, index=False) print(f"\nSaved combined match stats for {len(all_matches_dfs)} players to {filepath}") # Clean up checkpoint file if os.path.exists(checkpoint_file): os.remove(checkpoint_file) print("Removed checkpoint file after successful completion") # Save error log if any errors occurred if error_players: error_df = pd.DataFrame(error_players) error_filepath = os.path.join(save_dir, f"recent_matches_error.csv") error_df.to_csv(error_filepath, index=False) print(f"Saved error log to {error_filepath}") # Print summary print("\nSummary:") print(f"Total players processed: {len(players_df)}") print(f"Successful: {len(all_matches_dfs)}") print(f"Failed: {len(error_players)}") print(f"Total matches collected: {len(final_df)}") return final_df else: print("\nNo match data was collected") return None