MilesCranmer commited on
Commit
505af8d
1 Parent(s): 483a9b8

fix: boolean selection masks in pandas eval

Browse files
Files changed (3) hide show
  1. pysr/export_numpy.py +10 -2
  2. pysr/sr.py +1 -1
  3. pysr/test/test.py +3 -3
pysr/export_numpy.py CHANGED
@@ -1,10 +1,12 @@
1
  """Code for exporting discovered expressions to numpy"""
2
 
3
  import warnings
 
4
 
5
  import numpy as np
6
  import pandas as pd
7
- from sympy import lambdify
 
8
 
9
 
10
  def sympy2numpy(eqn, sympy_symbols, *, selection=None):
@@ -14,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
14
  class CallableEquation:
15
  """Simple wrapper for numpy lambda functions built with sympy"""
16
 
 
 
 
 
17
  def __init__(self, eqn, sympy_symbols, selection=None):
18
  self._sympy = eqn
19
  self._sympy_symbols = sympy_symbols
@@ -29,8 +35,9 @@ class CallableEquation:
29
  return self._lambda(
30
  **{k: X[k].values for k in map(str, self._sympy_symbols)}
31
  ) * np.ones(expected_shape)
 
32
  if self._selection is not None:
33
- if X.shape[1] != len(self._selection):
34
  warnings.warn(
35
  "`X` should be of shape (n_samples, len(self._selection)). "
36
  "Automatically filtering `X` to selection. "
@@ -38,6 +45,7 @@ class CallableEquation:
38
  "this may lead to incorrect predictions and other errors."
39
  )
40
  X = X[:, self._selection]
 
41
  return self._lambda(*X.T) * np.ones(expected_shape)
42
 
43
  @property
 
1
  """Code for exporting discovered expressions to numpy"""
2
 
3
  import warnings
4
+ from typing import List, Union
5
 
6
  import numpy as np
7
  import pandas as pd
8
+ from numpy.typing import NDArray
9
+ from sympy import Expr, Symbol, lambdify
10
 
11
 
12
  def sympy2numpy(eqn, sympy_symbols, *, selection=None):
 
16
  class CallableEquation:
17
  """Simple wrapper for numpy lambda functions built with sympy"""
18
 
19
+ _sympy: Expr
20
+ _sympy_symbols: List[Symbol]
21
+ _selection: Union[NDArray[np.bool_], None]
22
+
23
  def __init__(self, eqn, sympy_symbols, selection=None):
24
  self._sympy = eqn
25
  self._sympy_symbols = sympy_symbols
 
35
  return self._lambda(
36
  **{k: X[k].values for k in map(str, self._sympy_symbols)}
37
  ) * np.ones(expected_shape)
38
+
39
  if self._selection is not None:
40
+ if X.shape[1] != self._selection.sum():
41
  warnings.warn(
42
  "`X` should be of shape (n_samples, len(self._selection)). "
43
  "Automatically filtering `X` to selection. "
 
45
  "this may lead to incorrect predictions and other errors."
46
  )
47
  X = X[:, self._selection]
48
+
49
  return self._lambda(*X.T) * np.ones(expected_shape)
50
 
51
  @property
pysr/sr.py CHANGED
@@ -2056,7 +2056,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2056
  if self.selection_mask_ is not None:
2057
  # RangeIndex enforces column order allowing columns to
2058
  # be correctly filtered with self.selection_mask_
2059
- X = X.iloc[:, self.selection_mask_]
2060
  X.columns = self.feature_names_in_
2061
  # Without feature information, CallableEquation/lambda_format equations
2062
  # require that the column order of X matches that of the X used during
 
2056
  if self.selection_mask_ is not None:
2057
  # RangeIndex enforces column order allowing columns to
2058
  # be correctly filtered with self.selection_mask_
2059
+ X = X[X.columns[self.selection_mask_]]
2060
  X.columns = self.feature_names_in_
2061
  # Without feature information, CallableEquation/lambda_format equations
2062
  # require that the column order of X matches that of the X used during
pysr/test/test.py CHANGED
@@ -526,7 +526,7 @@ class TestFeatureSelection(unittest.TestCase):
526
  X = self.rstate.randn(20000, 5)
527
  y = X[:, 2] ** 2 + X[:, 3] ** 2
528
  selected = run_feature_selection(X, y, select_k_features=2)
529
- self.assertEqual(sorted(selected), [2, 3])
530
 
531
  def test_feature_selection_handler(self):
532
  X = self.rstate.randn(20000, 5)
@@ -538,8 +538,8 @@ class TestFeatureSelection(unittest.TestCase):
538
  variable_names=var_names,
539
  y=y,
540
  )
541
- self.assertTrue((2 in selection) and (3 in selection))
542
- selected_var_names = [var_names[i] for i in selection]
543
  self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
544
  np.testing.assert_array_equal(
545
  np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
 
526
  X = self.rstate.randn(20000, 5)
527
  y = X[:, 2] ** 2 + X[:, 3] ** 2
528
  selected = run_feature_selection(X, y, select_k_features=2)
529
+ np.testing.assert_array_equal(selected, [False, False, True, True, False])
530
 
531
  def test_feature_selection_handler(self):
532
  X = self.rstate.randn(20000, 5)
 
538
  variable_names=var_names,
539
  y=y,
540
  )
541
+ np.testing.assert_array_equal(selection, [False, False, True, True, False])
542
+ selected_var_names = [var_names[i] for i in range(5) if selection[i]]
543
  self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
544
  np.testing.assert_array_equal(
545
  np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)