Jimin Park commited on
Commit
6d09e09
·
1 Parent(s): adabb71

added model

Browse files
Files changed (1) hide show
  1. util/Recent_match_scrapper.py +57 -26
util/Recent_match_scrapper.py CHANGED
@@ -219,68 +219,99 @@ def process_match_data(match_data, username, players):
219
  return None
220
 
221
  def get_matches_stats(region, username, max_retries=2):
222
- print("====================== Inside get_matches_stats======================= \n")
223
- print("input parameters, region: ", region, " username: ", username)
224
  """
225
  Get match stats for a single player with retry mechanism
226
  """
 
 
 
 
 
227
  driver = None
228
  retry_count = 0
229
-
230
  while retry_count <= max_retries:
231
  try:
 
 
232
  driver = setup_driver()
233
  driver.set_page_load_timeout(20) # Set page load timeout
234
-
 
 
235
  url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED"
236
- print(f"Accessing URL: {url}")
237
  driver.get(url)
238
 
 
 
239
  matches_container = WebDriverWait(driver, 20).until(
240
  EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0"))
241
  )
242
-
243
- matches_data = []
 
 
244
  match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90")
 
 
 
245
 
246
- #print(f"Found {len(match_elements)} matches")
247
-
248
  for i, match in enumerate(match_elements, 1):
 
249
  try:
 
250
  match_data = extract_match_data(match)
 
 
 
251
  players = get_players_info(match)
252
- match_data['match_date'] = get_tooltip_date(
253
- driver,
254
- match.find_element(By.CSS_SELECTOR, "div.time-stamp > div")
255
- )
256
-
 
 
 
 
257
  processed_data = process_match_data(match_data, username, players)
258
  if processed_data:
259
  matches_data.append(processed_data)
260
- except Exception as e:
261
- print(f"Error processing match {i}: {e}")
262
- continue
 
 
 
263
 
 
264
  if matches_data:
265
  return pd.DataFrame(matches_data)
266
  else:
267
- raise Exception("No valid matches found")
268
-
269
  except Exception as e:
270
  retry_count += 1
271
- print(f"Attempt {retry_count} failed: {e}")
272
  if retry_count <= max_retries:
273
- print(f"Retrying... ({retry_count}/{max_retries})")
274
  time.sleep(5) # Wait 5 seconds before retrying
275
  else:
276
- print(f"Max retries reached")
277
- return pd.DataFrame()
278
-
 
279
  finally:
280
  if driver:
 
281
  driver.quit()
282
-
283
- return pd.DataFrame()
 
 
 
284
 
285
  def get_multiple_matches_stats(players_df):
286
  """
 
219
  return None
220
 
221
  def get_matches_stats(region, username, max_retries=2):
 
 
222
  """
223
  Get match stats for a single player with retry mechanism
224
  """
225
+ if not region or not username:
226
+ raise ValueError("Both 'region' and 'username' must be provided")
227
+
228
+ attempt_details = [] # To collect detailed logs for debugging
229
+
230
  driver = None
231
  retry_count = 0
232
+
233
  while retry_count <= max_retries:
234
  try:
235
+ # Initialize the WebDriver
236
+ attempt_details.append("Setting up WebDriver...")
237
  driver = setup_driver()
238
  driver.set_page_load_timeout(20) # Set page load timeout
239
+ attempt_details.append("WebDriver setup complete.")
240
+
241
+ # Construct the URL
242
  url = f"https://www.op.gg/summoners/{region}/{username}?queue_type=SOLORANKED"
243
+ attempt_details.append(f"Accessing URL: {url}")
244
  driver.get(url)
245
 
246
+ # Wait for matches container to load
247
+ attempt_details.append("Waiting for matches container...")
248
  matches_container = WebDriverWait(driver, 20).until(
249
  EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1jxewmm.ek41ybw0"))
250
  )
251
+ attempt_details.append("Matches container found.")
252
+
253
+ # Find match elements
254
+ attempt_details.append("Finding match elements...")
255
  match_elements = matches_container.find_elements(By.CSS_SELECTOR, "div.css-j7qwjs.ery81n90")
256
+ attempt_details.append(f"Found {len(match_elements)} matches.")
257
+
258
+ matches_data = []
259
 
260
+ # Process each match
 
261
  for i, match in enumerate(match_elements, 1):
262
+ attempt_details.append(f"Processing match {i}...")
263
  try:
264
+ # Extract data for the match
265
  match_data = extract_match_data(match)
266
+ attempt_details.append(f"Extracted match data for match {i}: {match_data}")
267
+
268
+ # Get player info
269
  players = get_players_info(match)
270
+ attempt_details.append(f"Extracted players info for match {i}: {players}")
271
+
272
+ # Get match date
273
+ tooltip_element = match.find_element(By.CSS_SELECTOR, "div.time-stamp > div")
274
+ match_date = get_tooltip_date(driver, tooltip_element)
275
+ match_data['match_date'] = match_date
276
+ attempt_details.append(f"Extracted match date for match {i}: {match_date}")
277
+
278
+ # Process and validate match data
279
  processed_data = process_match_data(match_data, username, players)
280
  if processed_data:
281
  matches_data.append(processed_data)
282
+ attempt_details.append(f"Processed match data for match {i}: {processed_data}")
283
+ else:
284
+ attempt_details.append(f"Processed match {i} returned no valid data.")
285
+
286
+ except Exception as match_error:
287
+ raise RuntimeError(f"Error processing match {i}: {match_error}")
288
 
289
+ # Return DataFrame if matches are found
290
  if matches_data:
291
  return pd.DataFrame(matches_data)
292
  else:
293
+ raise RuntimeError("No valid matches found")
294
+
295
  except Exception as e:
296
  retry_count += 1
297
+ attempt_details.append(f"Attempt {retry_count} failed: {e}")
298
  if retry_count <= max_retries:
299
+ attempt_details.append(f"Retrying... ({retry_count}/{max_retries})")
300
  time.sleep(5) # Wait 5 seconds before retrying
301
  else:
302
+ attempt_details.append("Max retries reached. No data retrieved.")
303
+ error_log = "\n".join(attempt_details)
304
+ raise RuntimeError(f"get_matches_stats failed after {max_retries} retries:\n{error_log}")
305
+
306
  finally:
307
  if driver:
308
+ attempt_details.append("Closing WebDriver...")
309
  driver.quit()
310
+ attempt_details.append("WebDriver closed.")
311
+
312
+ error_log = "\n".join(attempt_details)
313
+ raise RuntimeError(f"Exiting get_matches_stats with no data:\n{error_log}")
314
+
315
 
316
  def get_multiple_matches_stats(players_df):
317
  """