James McCool commited on
Commit
b8a8ac9
·
1 Parent(s): b01e348

Refactor reassess_edge function in reassess_edge.py to improve efficiency by concatenating modified and base frames, simplifying the extraction of updated rows, and enhancing clarity in the logic for processing modified data.

Browse files
Files changed (1) hide show
  1. global_func/reassess_edge.py +9 -30
global_func/reassess_edge.py CHANGED
@@ -3,8 +3,8 @@ from global_func.predict_dupes import predict_dupes
3
 
4
  def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str) -> pd.DataFrame:
5
  """
6
- Reassess edge by re-inserting modified rows into the base frame, running predict_dupes,
7
- and then extracting the updated modified rows.
8
 
9
  Args:
10
  modified_frame: DataFrame with rows that were modified by exposure_spread
@@ -19,37 +19,16 @@ def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_d
19
  Returns:
20
  DataFrame: Updated modified_frame with recalculated metrics
21
  """
22
- # Create a copy of the base frame
23
- combined_frame = base_frame.copy()
24
 
25
- # Get the player columns (excluding salary, median, Own)
26
- player_columns = [col for col in modified_frame.columns if col not in ['salary', 'median', 'Own']]
27
-
28
- # For each modified row, find and replace the corresponding row in the combined frame
29
- for idx, modified_row in modified_frame.iterrows():
30
- # Find the row in combined_frame that matches the player composition
31
- for combined_idx, combined_row in combined_frame.iterrows():
32
- if all(modified_row[col] == combined_row[col] for col in player_columns):
33
- # Replace the row with the modified version
34
- combined_frame.loc[combined_idx] = modified_row
35
- break
36
 
37
  # Run predict_dupes on the combined frame
38
  updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
39
 
40
- # Extract the updated modified rows
41
- updated_modified_rows = []
42
- for idx, modified_row in modified_frame.iterrows():
43
- for combined_idx, combined_row in updated_combined_frame.iterrows():
44
- if all(modified_row[col] == combined_row[col] for col in player_columns):
45
- updated_modified_rows.append(combined_row)
46
- break
47
 
48
- # Convert back to DataFrame
49
- if updated_modified_rows:
50
- result_frame = pd.DataFrame(updated_modified_rows)
51
- result_frame = result_frame.reset_index(drop=True)
52
- return result_frame
53
- else:
54
- # If no matches found, return the original modified_frame
55
- return modified_frame
 
3
 
4
  def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str) -> pd.DataFrame:
5
  """
6
+ Reassess edge by concatenating modified frame with base frame, running predict_dupes,
7
+ and then extracting the first N rows (where N is the length of modified_frame).
8
 
9
  Args:
10
  modified_frame: DataFrame with rows that were modified by exposure_spread
 
19
  Returns:
20
  DataFrame: Updated modified_frame with recalculated metrics
21
  """
22
+ # Store the number of rows in the modified frame
23
+ num_modified_rows = len(modified_frame)
24
 
25
+ # Concatenate the modified frame with the base frame
26
+ combined_frame = pd.concat([modified_frame, base_frame], ignore_index=True)
 
 
 
 
 
 
 
 
 
27
 
28
  # Run predict_dupes on the combined frame
29
  updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
30
 
31
+ # Extract the first N rows (which correspond to our modified frame)
32
+ result_frame = updated_combined_frame.head(num_modified_rows).copy()
 
 
 
 
 
33
 
34
+ return result_frame