Spaces:
Sleeping
Sleeping
tttc3
commited on
Commit
•
83d8e67
1
Parent(s):
2d0032e
Fixed issues outlined in pull request review
Browse files- pysr/export_numpy.py +9 -1
- pysr/sr.py +53 -30
pysr/export_numpy.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
from sympy import lambdify
|
|
|
5 |
|
6 |
|
7 |
class CallableEquation:
|
@@ -25,5 +26,12 @@ class CallableEquation:
|
|
25 |
**{k: X[k].values for k in self._variable_names}
|
26 |
) * np.ones(expected_shape)
|
27 |
if self._selection is not None:
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
from sympy import lambdify
|
5 |
+
import warnings
|
6 |
|
7 |
|
8 |
class CallableEquation:
|
|
|
26 |
**{k: X[k].values for k in self._variable_names}
|
27 |
) * np.ones(expected_shape)
|
28 |
if self._selection is not None:
|
29 |
+
if X.shape[1] != len(self._selection):
|
30 |
+
warnings.warn(
|
31 |
+
"`X` should be of shape (n_samples, len(self._selection)). "
|
32 |
+
"Automatically filtering `X` to selection. "
|
33 |
+
"Note: Filtered `X` column order may not match column order in fit "
|
34 |
+
"this may lead to incorrect predictions and other errors."
|
35 |
+
)
|
36 |
+
X = X[:, self._selection]
|
37 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
pysr/sr.py
CHANGED
@@ -190,7 +190,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
190 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
191 |
List of strings giving the binary operators in Julia's Base.
|
192 |
|
193 |
-
unary_operators : list[str], default=
|
194 |
Same as :param`binary_operators` but for operators taking a
|
195 |
single scalar.
|
196 |
|
@@ -226,7 +226,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
226 |
timeout_in_seconds : float, default=None
|
227 |
Make the search return early once this many seconds have passed.
|
228 |
|
229 |
-
constraints : dict[str, int | tuple[int,int]], default=
|
230 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
|
231 |
maxsize constraints on the individual arguments of operators.
|
232 |
E.g., `'pow': (-1, 1)` says that power laws can have any
|
@@ -462,7 +462,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
462 |
Whether to create a 'torch_format' column in the output,
|
463 |
containing a torch module with trainable parameters.
|
464 |
|
465 |
-
extra_sympy_mappings : dict[str, Callable], default=
|
466 |
Provides mappings between custom :param`binary_operators` or
|
467 |
:param`unary_operators` defined in julia strings, to those same
|
468 |
operators defined in sympy.
|
@@ -470,13 +470,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
470 |
model to be export to sympy, :param`extra_sympy_mappings`
|
471 |
would be `{"inv": lambda x: 1/x}`.
|
472 |
|
473 |
-
extra_jax_mappings : dict[Callable, str], default=
|
474 |
Similar to :param`extra_sympy_mappings` but for model export
|
475 |
to jax. The dictionary maps sympy functions to jax functions.
|
476 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
477 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
478 |
|
479 |
-
extra_torch_mappings : dict[Callable, Callable], default=
|
480 |
The same as :param`extra_jax_mappings` but for model export
|
481 |
to pytorch. Note that the dictionary keys should be callable
|
482 |
pytorch expressions.
|
@@ -571,13 +571,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
571 |
self,
|
572 |
model_selection="best",
|
573 |
*,
|
574 |
-
binary_operators=
|
575 |
-
|
576 |
-
"-",
|
577 |
-
"*",
|
578 |
-
"/",
|
579 |
-
],
|
580 |
-
unary_operators=[],
|
581 |
niterations=40,
|
582 |
populations=15,
|
583 |
population_size=33,
|
@@ -586,7 +581,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
586 |
maxdepth=None,
|
587 |
warmup_maxsize_by=0.0,
|
588 |
timeout_in_seconds=None,
|
589 |
-
constraints=
|
590 |
nested_constraints=None,
|
591 |
loss="L2DistLoss()",
|
592 |
complexity_of_operators=None,
|
@@ -640,9 +635,9 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
640 |
update=True,
|
641 |
output_jax_format=False,
|
642 |
output_torch_format=False,
|
643 |
-
extra_sympy_mappings=
|
644 |
-
extra_torch_mappings=
|
645 |
-
extra_jax_mappings=
|
646 |
denoise=False,
|
647 |
select_k_features=None,
|
648 |
**kwargs,
|
@@ -888,6 +883,14 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
888 |
|
889 |
"""
|
890 |
# Handle None values for instance parameters:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
891 |
if self.multithreading is None:
|
892 |
# Default is multithreading=True, unless explicitly set,
|
893 |
# or procs is set to 0 (serial mode).
|
@@ -1018,11 +1021,12 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1018 |
|
1019 |
"""
|
1020 |
if isinstance(X, pd.DataFrame):
|
1021 |
-
variable_names
|
1022 |
-
|
1023 |
-
|
1024 |
-
|
1025 |
-
|
|
|
1026 |
|
1027 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
1028 |
X.columns = X.columns.str.replace(" ", "_")
|
@@ -1395,7 +1399,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1395 |
self : object
|
1396 |
Fitted Estimator.
|
1397 |
"""
|
1398 |
-
|
1399 |
# Init attributes that are not specified in BaseEstimator
|
1400 |
self.equations_ = None
|
1401 |
self.nout_ = 1
|
@@ -1482,14 +1485,35 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1482 |
ValueError
|
1483 |
Raises if the `best_equation` cannot be evaluated.
|
1484 |
"""
|
1485 |
-
check_is_fitted(self)
|
1486 |
-
|
1487 |
-
|
1488 |
-
|
1489 |
-
|
1490 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1491 |
|
1492 |
X = self._validate_data(X, reset=False)
|
|
|
1493 |
try:
|
1494 |
if self.nout_ > 1:
|
1495 |
return np.stack(
|
@@ -1685,8 +1709,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1685 |
}
|
1686 |
|
1687 |
sympy_symbols = [
|
1688 |
-
sympy.Symbol(self.feature_names_in_
|
1689 |
-
for i in range(self.n_features_in_)
|
1690 |
]
|
1691 |
|
1692 |
for _, eqn_row in output.iterrows():
|
|
|
190 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
191 |
List of strings giving the binary operators in Julia's Base.
|
192 |
|
193 |
+
unary_operators : list[str], default=None
|
194 |
Same as :param`binary_operators` but for operators taking a
|
195 |
single scalar.
|
196 |
|
|
|
226 |
timeout_in_seconds : float, default=None
|
227 |
Make the search return early once this many seconds have passed.
|
228 |
|
229 |
+
constraints : dict[str, int | tuple[int,int]], default=None
|
230 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
|
231 |
maxsize constraints on the individual arguments of operators.
|
232 |
E.g., `'pow': (-1, 1)` says that power laws can have any
|
|
|
462 |
Whether to create a 'torch_format' column in the output,
|
463 |
containing a torch module with trainable parameters.
|
464 |
|
465 |
+
extra_sympy_mappings : dict[str, Callable], default=None
|
466 |
Provides mappings between custom :param`binary_operators` or
|
467 |
:param`unary_operators` defined in julia strings, to those same
|
468 |
operators defined in sympy.
|
|
|
470 |
model to be export to sympy, :param`extra_sympy_mappings`
|
471 |
would be `{"inv": lambda x: 1/x}`.
|
472 |
|
473 |
+
extra_jax_mappings : dict[Callable, str], default=None
|
474 |
Similar to :param`extra_sympy_mappings` but for model export
|
475 |
to jax. The dictionary maps sympy functions to jax functions.
|
476 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
477 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
478 |
|
479 |
+
extra_torch_mappings : dict[Callable, Callable], default=None
|
480 |
The same as :param`extra_jax_mappings` but for model export
|
481 |
to pytorch. Note that the dictionary keys should be callable
|
482 |
pytorch expressions.
|
|
|
571 |
self,
|
572 |
model_selection="best",
|
573 |
*,
|
574 |
+
binary_operators=None,
|
575 |
+
unary_operators=None,
|
|
|
|
|
|
|
|
|
|
|
576 |
niterations=40,
|
577 |
populations=15,
|
578 |
population_size=33,
|
|
|
581 |
maxdepth=None,
|
582 |
warmup_maxsize_by=0.0,
|
583 |
timeout_in_seconds=None,
|
584 |
+
constraints=None,
|
585 |
nested_constraints=None,
|
586 |
loss="L2DistLoss()",
|
587 |
complexity_of_operators=None,
|
|
|
635 |
update=True,
|
636 |
output_jax_format=False,
|
637 |
output_torch_format=False,
|
638 |
+
extra_sympy_mappings=None,
|
639 |
+
extra_torch_mappings=None,
|
640 |
+
extra_jax_mappings=None,
|
641 |
denoise=False,
|
642 |
select_k_features=None,
|
643 |
**kwargs,
|
|
|
883 |
|
884 |
"""
|
885 |
# Handle None values for instance parameters:
|
886 |
+
if self.binary_operators is None:
|
887 |
+
self.binary_operators = "+ * - /".split(" ")
|
888 |
+
if self.unary_operators is None:
|
889 |
+
self.unary_operators = []
|
890 |
+
if self.extra_sympy_mappings is None:
|
891 |
+
self.extra_sympy_mappings = {}
|
892 |
+
if self.constraints is None:
|
893 |
+
self.constraints = {}
|
894 |
if self.multithreading is None:
|
895 |
# Default is multithreading=True, unless explicitly set,
|
896 |
# or procs is set to 0 (serial mode).
|
|
|
1021 |
|
1022 |
"""
|
1023 |
if isinstance(X, pd.DataFrame):
|
1024 |
+
if variable_names:
|
1025 |
+
variable_names = None
|
1026 |
+
warnings.warn(
|
1027 |
+
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1028 |
+
"Will use DataFrame column names instead."
|
1029 |
+
)
|
1030 |
|
1031 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
1032 |
X.columns = X.columns.str.replace(" ", "_")
|
|
|
1399 |
self : object
|
1400 |
Fitted Estimator.
|
1401 |
"""
|
|
|
1402 |
# Init attributes that are not specified in BaseEstimator
|
1403 |
self.equations_ = None
|
1404 |
self.nout_ = 1
|
|
|
1485 |
ValueError
|
1486 |
Raises if the `best_equation` cannot be evaluated.
|
1487 |
"""
|
1488 |
+
check_is_fitted(self, attributes=["equations_", "feature_names_in_"])
|
1489 |
+
|
1490 |
+
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
1491 |
+
# the self.feature_names_in_ generated during fit, for the same X,
|
1492 |
+
# will cause a warning to be thrown during _validate_data.
|
1493 |
+
# To avoid this, convert X to a dataframe, apply the selection mask,
|
1494 |
+
# and then set the column/feature_names of X to be equal to those
|
1495 |
+
# generated during fit.
|
1496 |
+
if isinstance(X, np.ndarray):
|
1497 |
+
X = pd.DataFrame(X)
|
1498 |
+
|
1499 |
+
if isinstance(X.columns, pd.RangeIndex):
|
1500 |
+
if self.selection_mask_:
|
1501 |
+
# RangeIndex enforces column order allowing columns to
|
1502 |
+
# be correctly filtered with self.selection_mask_
|
1503 |
+
X = X.iloc[:, self.selection_mask_]
|
1504 |
+
X.columns = self.feature_names_in_
|
1505 |
+
|
1506 |
+
# Without feature information, CallableEquation/lambda_format equations
|
1507 |
+
# require that the column order of X matches that of the X used during
|
1508 |
+
# the fitting process. _validate_data removes this feature information
|
1509 |
+
# when it converts the dataframe to an np array. Thus, to ensure feature
|
1510 |
+
# order is preserved after conversion, the dataframe columns must be
|
1511 |
+
# reordered/reindexed to match those of the transformed (denoised and
|
1512 |
+
# feature selected) X in fit.
|
1513 |
+
X = X.reindex(columns=self.feature_names_in_)
|
1514 |
|
1515 |
X = self._validate_data(X, reset=False)
|
1516 |
+
|
1517 |
try:
|
1518 |
if self.nout_ > 1:
|
1519 |
return np.stack(
|
|
|
1709 |
}
|
1710 |
|
1711 |
sympy_symbols = [
|
1712 |
+
sympy.Symbol(variable) for variable in self.feature_names_in_
|
|
|
1713 |
]
|
1714 |
|
1715 |
for _, eqn_row in output.iterrows():
|