Spaces:
Running
Running
MilesCranmer
commited on
fix: boolean selection masks in pandas eval
Browse files- pysr/export_numpy.py +10 -2
- pysr/sr.py +1 -1
- pysr/test/test.py +3 -3
pysr/export_numpy.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
"""Code for exporting discovered expressions to numpy"""
|
2 |
|
3 |
import warnings
|
|
|
4 |
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
-
from
|
|
|
8 |
|
9 |
|
10 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
@@ -14,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
14 |
class CallableEquation:
|
15 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
16 |
|
|
|
|
|
|
|
|
|
17 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
18 |
self._sympy = eqn
|
19 |
self._sympy_symbols = sympy_symbols
|
@@ -29,8 +35,9 @@ class CallableEquation:
|
|
29 |
return self._lambda(
|
30 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
31 |
) * np.ones(expected_shape)
|
|
|
32 |
if self._selection is not None:
|
33 |
-
if X.shape[1] !=
|
34 |
warnings.warn(
|
35 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
36 |
"Automatically filtering `X` to selection. "
|
@@ -38,6 +45,7 @@ class CallableEquation:
|
|
38 |
"this may lead to incorrect predictions and other errors."
|
39 |
)
|
40 |
X = X[:, self._selection]
|
|
|
41 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
42 |
|
43 |
@property
|
|
|
1 |
"""Code for exporting discovered expressions to numpy"""
|
2 |
|
3 |
import warnings
|
4 |
+
from typing import List, Union
|
5 |
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
+
from numpy.typing import NDArray
|
9 |
+
from sympy import Expr, Symbol, lambdify
|
10 |
|
11 |
|
12 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
|
16 |
class CallableEquation:
|
17 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
18 |
|
19 |
+
_sympy: Expr
|
20 |
+
_sympy_symbols: List[Symbol]
|
21 |
+
_selection: Union[NDArray[np.bool_], None]
|
22 |
+
|
23 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
24 |
self._sympy = eqn
|
25 |
self._sympy_symbols = sympy_symbols
|
|
|
35 |
return self._lambda(
|
36 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
37 |
) * np.ones(expected_shape)
|
38 |
+
|
39 |
if self._selection is not None:
|
40 |
+
if X.shape[1] != self._selection.sum():
|
41 |
warnings.warn(
|
42 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
43 |
"Automatically filtering `X` to selection. "
|
|
|
45 |
"this may lead to incorrect predictions and other errors."
|
46 |
)
|
47 |
X = X[:, self._selection]
|
48 |
+
|
49 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
50 |
|
51 |
@property
|
pysr/sr.py
CHANGED
@@ -2056,7 +2056,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2056 |
if self.selection_mask_ is not None:
|
2057 |
# RangeIndex enforces column order allowing columns to
|
2058 |
# be correctly filtered with self.selection_mask_
|
2059 |
-
X = X.
|
2060 |
X.columns = self.feature_names_in_
|
2061 |
# Without feature information, CallableEquation/lambda_format equations
|
2062 |
# require that the column order of X matches that of the X used during
|
|
|
2056 |
if self.selection_mask_ is not None:
|
2057 |
# RangeIndex enforces column order allowing columns to
|
2058 |
# be correctly filtered with self.selection_mask_
|
2059 |
+
X = X[X.columns[self.selection_mask_]]
|
2060 |
X.columns = self.feature_names_in_
|
2061 |
# Without feature information, CallableEquation/lambda_format equations
|
2062 |
# require that the column order of X matches that of the X used during
|
pysr/test/test.py
CHANGED
@@ -526,7 +526,7 @@ class TestFeatureSelection(unittest.TestCase):
|
|
526 |
X = self.rstate.randn(20000, 5)
|
527 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
528 |
selected = run_feature_selection(X, y, select_k_features=2)
|
529 |
-
|
530 |
|
531 |
def test_feature_selection_handler(self):
|
532 |
X = self.rstate.randn(20000, 5)
|
@@ -538,8 +538,8 @@ class TestFeatureSelection(unittest.TestCase):
|
|
538 |
variable_names=var_names,
|
539 |
y=y,
|
540 |
)
|
541 |
-
|
542 |
-
selected_var_names = [var_names[i] for i in selection]
|
543 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
544 |
np.testing.assert_array_equal(
|
545 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|
|
|
526 |
X = self.rstate.randn(20000, 5)
|
527 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
528 |
selected = run_feature_selection(X, y, select_k_features=2)
|
529 |
+
np.testing.assert_array_equal(selected, [False, False, True, True, False])
|
530 |
|
531 |
def test_feature_selection_handler(self):
|
532 |
X = self.rstate.randn(20000, 5)
|
|
|
538 |
variable_names=var_names,
|
539 |
y=y,
|
540 |
)
|
541 |
+
np.testing.assert_array_equal(selection, [False, False, True, True, False])
|
542 |
+
selected_var_names = [var_names[i] for i in range(5) if selection[i]]
|
543 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
544 |
np.testing.assert_array_equal(
|
545 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|