Jimin Park
commited on
Commit
·
6d09e09
1
Parent(s):
adabb71
added model
Browse files- util/Recent_match_scrapper.py +57 -26
util/Recent_match_scrapper.py
CHANGED
@@ -219,68 +219,99 @@ def process_match_data(match_data, username, players):
|
|
219 |
return None
|
220 |
|
221 |
def get_matches_stats(region, username, max_retries=2):
|
222 |
-
print("====================== Inside get_matches_stats======================= \n")
|
223 |
-
print("input parameters, region: ", region, " username: ", username)
|
224 |
"""
|
225 |
Get match stats for a single player with retry mechanism
|
226 |
"""
|
|
|
|
|
|
|
|
|
|
|
227 |
driver = None
|
228 |
retry_count = 0
|
229 |
-
|
230 |
while retry_count <= max_retries:
|
231 |
try:
|
|
|
|
|
232 |
driver = setup_driver()
|
233 |
driver.set_page_load_timeout(20) # Set page load timeout
|
234 |
-
|
|
|
|
|
235 |
url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED"
|
236 |
-
|
237 |
driver.get(url)
|
238 |
|
|
|
|
|
239 |
matches_container = WebDriverWait(driver, 20).until(
|
240 |
EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0"))
|
241 |
)
|
242 |
-
|
243 |
-
|
|
|
|
|
244 |
match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90")
|
|
|
|
|
|
|
245 |
|
246 |
-
#
|
247 |
-
|
248 |
for i, match in enumerate(match_elements, 1):
|
|
|
249 |
try:
|
|
|
250 |
match_data = extract_match_data(match)
|
|
|
|
|
|
|
251 |
players = get_players_info(match)
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
)
|
256 |
-
|
|
|
|
|
|
|
|
|
257 |
processed_data = process_match_data(match_data, username, players)
|
258 |
if processed_data:
|
259 |
matches_data.append(processed_data)
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
263 |
|
|
|
264 |
if matches_data:
|
265 |
return pd.DataFrame(matches_data)
|
266 |
else:
|
267 |
-
raise
|
268 |
-
|
269 |
except Exception as e:
|
270 |
retry_count += 1
|
271 |
-
|
272 |
if retry_count <= max_retries:
|
273 |
-
|
274 |
time.sleep(5) # Wait 5 seconds before retrying
|
275 |
else:
|
276 |
-
|
277 |
-
|
278 |
-
|
|
|
279 |
finally:
|
280 |
if driver:
|
|
|
281 |
driver.quit()
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
284 |
|
285 |
def get_multiple_matches_stats(players_df):
|
286 |
"""
|
|
|
219 |
return None
|
220 |
|
221 |
def get_matches_stats(region, username, max_retries=2):
|
|
|
|
|
222 |
"""
|
223 |
Get match stats for a single player with retry mechanism
|
224 |
"""
|
225 |
+
if not region or not username:
|
226 |
+
raise ValueError("Both 'region' and 'username' must be provided")
|
227 |
+
|
228 |
+
attempt_details = [] # To collect detailed logs for debugging
|
229 |
+
|
230 |
driver = None
|
231 |
retry_count = 0
|
232 |
+
|
233 |
while retry_count <= max_retries:
|
234 |
try:
|
235 |
+
# Initialize the WebDriver
|
236 |
+
attempt_details.append("Setting up WebDriver...")
|
237 |
driver = setup_driver()
|
238 |
driver.set_page_load_timeout(20) # Set page load timeout
|
239 |
+
attempt_details.append("WebDriver setup complete.")
|
240 |
+
|
241 |
+
# Construct the URL
|
242 |
url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED"
|
243 |
+
attempt_details.append(f"Accessing URL: {url}")
|
244 |
driver.get(url)
|
245 |
|
246 |
+
# Wait for matches container to load
|
247 |
+
attempt_details.append("Waiting for matches container...")
|
248 |
matches_container = WebDriverWait(driver, 20).until(
|
249 |
EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0"))
|
250 |
)
|
251 |
+
attempt_details.append("Matches container found.")
|
252 |
+
|
253 |
+
# Find match elements
|
254 |
+
attempt_details.append("Finding match elements...")
|
255 |
match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90")
|
256 |
+
attempt_details.append(f"Found {len(match_elements)} matches.")
|
257 |
+
|
258 |
+
matches_data = []
|
259 |
|
260 |
+
# Process each match
|
|
|
261 |
for i, match in enumerate(match_elements, 1):
|
262 |
+
attempt_details.append(f"Processing match {i}...")
|
263 |
try:
|
264 |
+
# Extract data for the match
|
265 |
match_data = extract_match_data(match)
|
266 |
+
attempt_details.append(f"Extracted match data for match {i}: {match_data}")
|
267 |
+
|
268 |
+
# Get player info
|
269 |
players = get_players_info(match)
|
270 |
+
attempt_details.append(f"Extracted players info for match {i}: {players}")
|
271 |
+
|
272 |
+
# Get match date
|
273 |
+
tooltip_element = match.find_element(By.CSS_SELECTOR, "div.time-stamp > div")
|
274 |
+
match_date = get_tooltip_date(driver, tooltip_element)
|
275 |
+
match_data['match_date'] = match_date
|
276 |
+
attempt_details.append(f"Extracted match date for match {i}: {match_date}")
|
277 |
+
|
278 |
+
# Process and validate match data
|
279 |
processed_data = process_match_data(match_data, username, players)
|
280 |
if processed_data:
|
281 |
matches_data.append(processed_data)
|
282 |
+
attempt_details.append(f"Processed match data for match {i}: {processed_data}")
|
283 |
+
else:
|
284 |
+
attempt_details.append(f"Processed match {i} returned no valid data.")
|
285 |
+
|
286 |
+
except Exception as match_error:
|
287 |
+
raise RuntimeError(f"Error processing match {i}: {match_error}")
|
288 |
|
289 |
+
# Return DataFrame if matches are found
|
290 |
if matches_data:
|
291 |
return pd.DataFrame(matches_data)
|
292 |
else:
|
293 |
+
raise RuntimeError("No valid matches found")
|
294 |
+
|
295 |
except Exception as e:
|
296 |
retry_count += 1
|
297 |
+
attempt_details.append(f"Attempt {retry_count} failed: {e}")
|
298 |
if retry_count <= max_retries:
|
299 |
+
attempt_details.append(f"Retrying... ({retry_count}/{max_retries})")
|
300 |
time.sleep(5) # Wait 5 seconds before retrying
|
301 |
else:
|
302 |
+
attempt_details.append("Max retries reached. No data retrieved.")
|
303 |
+
error_log = "\n".join(attempt_details)
|
304 |
+
raise RuntimeError(f"get_matches_stats failed after {max_retries} retries:\n{error_log}")
|
305 |
+
|
306 |
finally:
|
307 |
if driver:
|
308 |
+
attempt_details.append("Closing WebDriver...")
|
309 |
driver.quit()
|
310 |
+
attempt_details.append("WebDriver closed.")
|
311 |
+
|
312 |
+
error_log = "\n".join(attempt_details)
|
313 |
+
raise RuntimeError(f"Exiting get_matches_stats with no data:\n{error_log}")
|
314 |
+
|
315 |
|
316 |
def get_multiple_matches_stats(players_df):
|
317 |
"""
|