import re import os import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.core.os_manager import ChromeType # Constants ROLES = ["top", "jungle", "mid", "adc", "support"] BASE_URL = "https://www.op.gg/champions?position={role}" TIER_COLOR_MAPPING = { "#0093FF": 1, # Blue "#00BBA3": 2, # Teal "#FFB900": 3, # Yellow "#9AA4AF": 4, # Gray } '''Original driver def setup_driver(): """Setup and return a configured Chrome WebDriver with optimized settings""" chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-logging") chrome_options.add_argument("--log-level=3") chrome_options.add_argument("--silent") chrome_options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ) # Remove log_level parameter from ChromeDriverManager service = Service(ChromeDriverManager().install()) return webdriver.Chrome(service=service, options=chrome_options) ''' #Test setup def setup_driver(): """Setup and return a configured Chrome WebDriver with optimized settings.""" # Define chrome options chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-logging") chrome_options.add_argument("--log-level=3") chrome_options.add_argument("--silent") chrome_options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ) # Check if we're running in Hugging Face Spaces or locally if 'HF_SPACE' in os.environ: # Hugging Face Space is detected, handle accordingly (example for versioning) print("Running on Hugging Face Space.") chromedriver_path = ChromeDriverManager().install() else: # Local environment setup print("Running chrome webdriver.") chromedriver_path = ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install() # Create the Service object using the installed chromedriver service = Service(executable_path=chromedriver_path) # Return the configured WebDriver instance driver = webdriver.Chrome(service=service, options=chrome_options) return driver def parse_rate(rate_str): """Convert percentage string to float""" try: return float(rate_str.strip().rstrip('%')) / 100 except: return 0.0 def extract_counter_champions(counter_column): """Extract counter champions from column""" counter_champions = [] try: counter_list = counter_column.find_elements(By.TAG_NAME, "a") for counter in counter_list[:3]: img_element = counter.find_element(By.TAG_NAME, "img") champion_name = img_element.get_attribute("alt") counter_champions.append(champion_name) except Exception: pass return counter_champions + [""] * (3 - len(counter_champions)) def get_champion_table_data(driver, url, role): """Extract champion data from a specific role page with optimized parsing""" try: driver.get(url) table = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#content-container > div.flex.gap-2.md\\:mx-auto.md\\:w-width-limit.mt-2.flex-col.overflow-hidden > div.flex.flex-row-reverse.gap-2 > main > div:nth-child(2) > table")) ) champions_data = [] for row in table.find_elements(By.TAG_NAME, "tr"): cols = row.find_elements(By.TAG_NAME, "td") if len(cols) <= 1: continue # Get tier value tier_element = cols[2].find_element(By.TAG_NAME, "svg") tier = 5 if tier_element: for path in tier_element.find_elements(By.TAG_NAME, "path"): fill_color = path.get_attribute("fill") if fill_color in TIER_COLOR_MAPPING: tier = TIER_COLOR_MAPPING[fill_color] break # Extract ban rate ban_rate_html = cols[6].get_attribute("innerHTML").strip() ban_rate_match = re.search(r"([\d.]+)", ban_rate_html.replace("", "")) ban_rate = float(ban_rate_match.group(1)) / 100 if ban_rate_match else 0.0 # Get counter champions counter1, counter2, counter3 = extract_counter_champions(cols[7]) champions_data.append({ "rank": cols[0].text.strip(), "champion": cols[1].text.strip(), "tier": tier, "role": role, "win_rate": parse_rate(cols[4].text), "pick_rate": parse_rate(cols[5].text), "ban_rate": ban_rate, "counter1": counter1, "counter2": counter2, "counter3": counter3, }) return champions_data except Exception as e: print(f"Error extracting table data for {role}: {e}") return [] def get_meta_stats(): """Main function to scrape champion data with improved error handling and logging""" driver = None print("================== inside get_meta_stats ========================\n") try: driver = setup_driver() all_roles_data = [] for role in ROLES: role_url = BASE_URL.format(role=role) role_data = get_champion_table_data(driver, role_url, role) all_roles_data.extend(role_data) if not all_roles_data: print("No data was collected from any role") return pd.DataFrame() df = pd.DataFrame(all_roles_data) # Save data save_dir = os.path.join("util", "data") os.makedirs(save_dir, exist_ok=True) filepath = os.path.join(save_dir, "meta_stats.csv") df.to_csv(filepath, index=False) print(f"Saved meta stats to {filepath}") print("================== Exiting get_meta_stats ========================\n") return df except Exception as e: print(f"Error in get_meta_stats: {e}") return pd.DataFrame() finally: if driver: driver.quit()