James McCool commited on
Commit
8b50a4a
·
1 Parent(s): 66b96f5

Refactor similarity score calculations in stratification_function to use quantiles instead of min/max thresholds, improving accuracy in target similarity score generation.

Browse files
global_func/stratification_function.py CHANGED
@@ -12,8 +12,8 @@ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude
12
  concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=False).reset_index(drop=True)
13
 
14
  # Calculate target similarity scores for linear progression
15
- similarity_floor = concat_portfolio[sorting_choice].min() + (concat_portfolio[sorting_choice].min() * low_threshold)
16
- similarity_ceiling = concat_portfolio[sorting_choice].max() - (concat_portfolio[sorting_choice].max() * high_threshold)
17
 
18
  # Create evenly spaced target similarity scores
19
  target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
 
12
  concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=False).reset_index(drop=True)
13
 
14
  # Calculate target similarity scores for linear progression
15
+ similarity_floor = concat_portfolio[sorting_choice].quantile(low_threshold / 100)
16
+ similarity_ceiling = concat_portfolio[sorting_choice].quantile(high_threshold / 100)
17
 
18
  # Create evenly spaced target similarity scores
19
  target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)