Spaces:
Running
Running
tttc3
commited on
Commit
•
9490776
1
Parent(s):
c7187a6
fixed issues from deepsource
Browse files- pysr/julia_helpers.py +5 -3
- pysr/sr.py +62 -62
pysr/julia_helpers.py
CHANGED
@@ -6,9 +6,11 @@ from .version import __version__, __symbolic_regression_jl_version__
|
|
6 |
|
7 |
|
8 |
def install(julia_project=None, quiet=False): # pragma: no cover
|
9 |
-
"""
|
|
|
10 |
|
11 |
-
Also updates the local Julia registry.
|
|
|
12 |
import julia
|
13 |
|
14 |
julia.install(quiet=quiet)
|
@@ -40,7 +42,7 @@ def install(julia_project=None, quiet=False): # pragma: no cover
|
|
40 |
|
41 |
|
42 |
def import_error_string(julia_project=None):
|
43 |
-
s =
|
44 |
Required dependencies are not installed or built. Run the following code in the Python REPL:
|
45 |
|
46 |
>>> import pysr
|
|
|
6 |
|
7 |
|
8 |
def install(julia_project=None, quiet=False): # pragma: no cover
|
9 |
+
"""
|
10 |
+
Install PyCall.jl and all required dependencies for SymbolicRegression.jl.
|
11 |
|
12 |
+
Also updates the local Julia registry.
|
13 |
+
"""
|
14 |
import julia
|
15 |
|
16 |
julia.install(quiet=quiet)
|
|
|
42 |
|
43 |
|
44 |
def import_error_string(julia_project=None):
|
45 |
+
s = """
|
46 |
Required dependencies are not installed or built. Run the following code in the Python REPL:
|
47 |
|
48 |
>>> import pysr
|
pysr/sr.py
CHANGED
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
11 |
from datetime import datetime
|
12 |
import warnings
|
13 |
from multiprocessing import cpu_count
|
|
|
14 |
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
|
15 |
from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
|
16 |
|
@@ -189,10 +190,9 @@ class CallableEquation:
|
|
189 |
return self._lambda(
|
190 |
**{k: X[k].values for k in self._variable_names}
|
191 |
) * np.ones(expected_shape)
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
return self._lambda(*X.T) * np.ones(expected_shape)
|
196 |
|
197 |
|
198 |
class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
@@ -349,13 +349,15 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
349 |
Relative likelihood for mutation to leave the individual.
|
350 |
|
351 |
weight_mutate_constant : float, default=0.048
|
352 |
-
Relative likelihood for mutation to change the constant slightly
|
|
|
353 |
|
354 |
weight_mutate_operator : float, default=0.47
|
355 |
Relative likelihood for mutation to swap an operator.
|
356 |
|
357 |
weight_randomize : float, default=0.00023
|
358 |
-
Relative likelihood for mutation to completely delete and then
|
|
|
359 |
|
360 |
weight_simplify : float, default=0.0020
|
361 |
Relative likelihood for mutation to simplify constant parts by evaluation
|
@@ -666,13 +668,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
666 |
):
|
667 |
|
668 |
# Hyperparameters
|
669 |
-
|
670 |
self.model_selection = model_selection
|
671 |
self.binary_operators = binary_operators
|
672 |
self.unary_operators = unary_operators
|
673 |
self.niterations = niterations
|
674 |
self.populations = populations
|
675 |
-
|
676 |
self.population_size = population_size
|
677 |
self.max_evals = max_evals
|
678 |
self.maxsize = maxsize
|
@@ -681,7 +683,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
681 |
self.timeout_in_seconds = timeout_in_seconds
|
682 |
self.constraints = constraints
|
683 |
self.nested_constraints = nested_constraints
|
684 |
-
|
685 |
self.loss = loss
|
686 |
self.complexity_of_operators = complexity_of_operators
|
687 |
self.complexity_of_constants = complexity_of_constants
|
@@ -692,8 +694,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
692 |
self.alpha = alpha
|
693 |
self.annealing = annealing
|
694 |
self.early_stop_condition = early_stop_condition
|
695 |
-
|
696 |
-
|
697 |
self.ncyclesperiteration = ncyclesperiteration
|
698 |
self.fraction_replaced = fraction_replaced
|
699 |
self.fraction_replaced_hof = fraction_replaced_hof
|
@@ -707,18 +709,18 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
707 |
self.weight_simplify = weight_simplify
|
708 |
self.crossover_probability = crossover_probability
|
709 |
self.skip_mutation_failures = skip_mutation_failures
|
710 |
-
|
711 |
self.migration = migration
|
712 |
self.hof_migration = hof_migration
|
713 |
self.topn = topn
|
714 |
-
|
715 |
self.should_optimize_constants = should_optimize_constants
|
716 |
self.optimizer_algorithm = optimizer_algorithm
|
717 |
self.optimizer_nrestarts = optimizer_nrestarts
|
718 |
self.optimize_probability = optimize_probability
|
719 |
self.optimizer_iterations = optimizer_iterations
|
720 |
self.perturbation_factor = perturbation_factor
|
721 |
-
|
722 |
self.tournament_selection_n = tournament_selection_n
|
723 |
self.tournament_selection_p = tournament_selection_p
|
724 |
# Solver parameters
|
@@ -730,11 +732,11 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
730 |
self.fast_cycle = fast_cycle
|
731 |
self.precision = precision
|
732 |
# Additional runtime parameters
|
733 |
-
|
734 |
self.verbosity = verbosity
|
735 |
self.update_verbosity = update_verbosity
|
736 |
self.progress = progress
|
737 |
-
|
738 |
self.equation_file = equation_file
|
739 |
self.temp_equation_file = temp_equation_file
|
740 |
self.tempdir = tempdir
|
@@ -921,29 +923,27 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
921 |
# Warn if instance parameters are not sensible values:
|
922 |
if self.batch_size < 1:
|
923 |
warnings.warn(
|
924 |
-
|
925 |
-
|
926 |
)
|
927 |
self.batch_size = 1
|
928 |
|
929 |
if n_samples > 10000 and not self.batching:
|
930 |
warnings.warn(
|
931 |
-
""
|
932 |
-
|
933 |
-
You should
|
934 |
-
|
935 |
-
|
936 |
-
|
937 |
-
|
938 |
-
More datapoints will lower the search speed."
|
939 |
-
""",
|
940 |
)
|
941 |
|
942 |
# Ensure instance parameters are allowable values:
|
943 |
# ValueError - Incompatible values
|
944 |
-
if
|
945 |
raise ValueError(
|
946 |
-
|
947 |
)
|
948 |
|
949 |
if self.maxsize > 40:
|
@@ -951,7 +951,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
951 |
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory. You should consider turning `use_frequency` to False, and perhaps use `warmup_maxsize_by`."
|
952 |
)
|
953 |
elif self.maxsize < 7:
|
954 |
-
raise ValueError(
|
955 |
|
956 |
if self.extra_jax_mappings is not None:
|
957 |
for value in self.extra_jax_mappings.values():
|
@@ -971,7 +971,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
971 |
else:
|
972 |
self.extra_torch_mappings = {}
|
973 |
|
974 |
-
# NotImplementedError -
|
975 |
if self.optimizer_algorithm not in self.VALID_OPTIMIZER_ALGORITHMS:
|
976 |
raise NotImplementedError(
|
977 |
f"PySR currently only supports the following optimizer algorithms: {self.VALID_OPTIMIZER_ALGORITHMS}"
|
@@ -1004,7 +1004,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1004 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1005 |
Target values. Will be cast to X's dtype if necessary.
|
1006 |
|
1007 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
|
|
1008 |
Resampled training data used for denoising.
|
1009 |
|
1010 |
variable_names : list[str] of length n_features
|
@@ -1022,7 +1023,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1022 |
Validated list of variable names for each feature in `X`.
|
1023 |
|
1024 |
"""
|
1025 |
-
|
1026 |
if isinstance(X, pd.DataFrame):
|
1027 |
variable_names = None
|
1028 |
warnings.warn(
|
@@ -1037,14 +1037,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1037 |
"Spaces have been replaced with underscores. \n"
|
1038 |
"Please rename the columns to valid names."
|
1039 |
)
|
1040 |
-
elif variable_names:
|
1041 |
-
|
1042 |
-
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
)
|
1048 |
# Only numpy values are needed from Xresampled, column metadata is
|
1049 |
# provided by X
|
1050 |
if isinstance(Xresampled, pd.DataFrame):
|
@@ -1080,7 +1079,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1080 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1081 |
Target values. Will be cast to X's dtype if necessary.
|
1082 |
|
1083 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
|
|
1084 |
Resampled training data used for denoising.
|
1085 |
|
1086 |
variable_names : list[str] of length n_features
|
@@ -1118,17 +1118,17 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1118 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
1119 |
|
1120 |
# Re-perform data validation and feature name updating
|
1121 |
-
X,
|
1122 |
X=X, y=y, reset=True, multi_output=True
|
1123 |
)
|
1124 |
# Update feature names with selected variable names
|
1125 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1126 |
-
print(f"Using features {
|
1127 |
|
1128 |
# Denoising transformation
|
1129 |
if self.denoise:
|
1130 |
if self.nout_ > 1:
|
1131 |
-
|
1132 |
[
|
1133 |
_denoise(X, y[:, i], Xresampled=Xresampled)[1]
|
1134 |
for i in range(self.nout_)
|
@@ -1168,8 +1168,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1168 |
ImportError
|
1169 |
Raised when the julia backend fails to import a package.
|
1170 |
"""
|
1171 |
-
|
1172 |
-
#
|
1173 |
global already_ran
|
1174 |
global Main
|
1175 |
|
@@ -1379,7 +1379,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1379 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1380 |
Target values. Will be cast to X's dtype if necessary.
|
1381 |
|
1382 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
|
|
1383 |
Resampled training data used for denoising.
|
1384 |
|
1385 |
weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
|
@@ -1420,7 +1421,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1420 |
X, y, Xresampled, variable_names
|
1421 |
)
|
1422 |
|
1423 |
-
# Warn about large feature counts (still warn if feature count is large
|
|
|
1424 |
if self.n_features_in_ >= 10:
|
1425 |
warnings.warn(
|
1426 |
"Note: you are running with 10 features or more. "
|
@@ -1512,8 +1514,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1512 |
def predict(self, X, index=None):
|
1513 |
"""Predict y from input X using the equation chosen by `model_selection`.
|
1514 |
|
1515 |
-
You may see what equation is used by printing this object. X should
|
1516 |
-
columns as the training data.
|
1517 |
|
1518 |
Parameters
|
1519 |
----------
|
@@ -1550,10 +1552,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1550 |
SymPy representation of the best equation.
|
1551 |
"""
|
1552 |
self.refresh()
|
1553 |
-
|
1554 |
if self.nout_ > 1:
|
1555 |
-
return [eq["sympy_format"] for eq in
|
1556 |
-
return
|
1557 |
|
1558 |
def latex(self, index=None):
|
1559 |
"""Return latex representation of the equation(s) chosen by `model_selection`.
|
@@ -1596,13 +1598,12 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1596 |
Dictionary of callable jax function in "callable" key,
|
1597 |
and jax array of parameters as "parameters" key.
|
1598 |
"""
|
1599 |
-
|
1600 |
self.set_params(output_jax_format=True)
|
1601 |
self.refresh()
|
1602 |
-
|
1603 |
if self.nout_ > 1:
|
1604 |
-
return [eq["jax_format"] for eq in
|
1605 |
-
return
|
1606 |
|
1607 |
def pytorch(self, index=None):
|
1608 |
"""Return pytorch representation of the equation(s) chosen by `model_selection`.
|
@@ -1626,10 +1627,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1626 |
"""
|
1627 |
self.set_params(output_torch_format=True)
|
1628 |
self.refresh()
|
1629 |
-
|
1630 |
if self.nout_ > 1:
|
1631 |
-
return [eq["torch_format"] for eq in
|
1632 |
-
return
|
1633 |
|
1634 |
def get_hof(self):
|
1635 |
"""Get the equations from a hall of fame file. If no arguments
|
@@ -1796,7 +1797,6 @@ def run_feature_selection(X, y, select_k_features):
|
|
1796 |
"""Use a gradient boosting tree regressor as a proxy for finding
|
1797 |
the k most important features in X, returning indices for those
|
1798 |
features as output."""
|
1799 |
-
|
1800 |
from sklearn.ensemble import RandomForestRegressor
|
1801 |
from sklearn.feature_selection import SelectFromModel
|
1802 |
|
|
|
11 |
from datetime import datetime
|
12 |
import warnings
|
13 |
from multiprocessing import cpu_count
|
14 |
+
from sklearn.linear_model import LinearRegression
|
15 |
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
|
16 |
from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
|
17 |
|
|
|
190 |
return self._lambda(
|
191 |
**{k: X[k].values for k in self._variable_names}
|
192 |
) * np.ones(expected_shape)
|
193 |
+
if self._selection is not None:
|
194 |
+
X = X[:, self._selection]
|
195 |
+
return self._lambda(*X.T) * np.ones(expected_shape)
|
|
|
196 |
|
197 |
|
198 |
class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
|
349 |
Relative likelihood for mutation to leave the individual.
|
350 |
|
351 |
weight_mutate_constant : float, default=0.048
|
352 |
+
Relative likelihood for mutation to change the constant slightly
|
353 |
+
in a random direction.
|
354 |
|
355 |
weight_mutate_operator : float, default=0.47
|
356 |
Relative likelihood for mutation to swap an operator.
|
357 |
|
358 |
weight_randomize : float, default=0.00023
|
359 |
+
Relative likelihood for mutation to completely delete and then
|
360 |
+
randomly generate the equation
|
361 |
|
362 |
weight_simplify : float, default=0.0020
|
363 |
Relative likelihood for mutation to simplify constant parts by evaluation
|
|
|
668 |
):
|
669 |
|
670 |
# Hyperparameters
|
671 |
+
# - Model search parameters
|
672 |
self.model_selection = model_selection
|
673 |
self.binary_operators = binary_operators
|
674 |
self.unary_operators = unary_operators
|
675 |
self.niterations = niterations
|
676 |
self.populations = populations
|
677 |
+
# - Model search Constraints
|
678 |
self.population_size = population_size
|
679 |
self.max_evals = max_evals
|
680 |
self.maxsize = maxsize
|
|
|
683 |
self.timeout_in_seconds = timeout_in_seconds
|
684 |
self.constraints = constraints
|
685 |
self.nested_constraints = nested_constraints
|
686 |
+
# - Loss parameters
|
687 |
self.loss = loss
|
688 |
self.complexity_of_operators = complexity_of_operators
|
689 |
self.complexity_of_constants = complexity_of_constants
|
|
|
694 |
self.alpha = alpha
|
695 |
self.annealing = annealing
|
696 |
self.early_stop_condition = early_stop_condition
|
697 |
+
# - Evolutionary search parameters
|
698 |
+
# -- Mutation parameters
|
699 |
self.ncyclesperiteration = ncyclesperiteration
|
700 |
self.fraction_replaced = fraction_replaced
|
701 |
self.fraction_replaced_hof = fraction_replaced_hof
|
|
|
709 |
self.weight_simplify = weight_simplify
|
710 |
self.crossover_probability = crossover_probability
|
711 |
self.skip_mutation_failures = skip_mutation_failures
|
712 |
+
# -- Migration parameters
|
713 |
self.migration = migration
|
714 |
self.hof_migration = hof_migration
|
715 |
self.topn = topn
|
716 |
+
# -- Constants parameters
|
717 |
self.should_optimize_constants = should_optimize_constants
|
718 |
self.optimizer_algorithm = optimizer_algorithm
|
719 |
self.optimizer_nrestarts = optimizer_nrestarts
|
720 |
self.optimize_probability = optimize_probability
|
721 |
self.optimizer_iterations = optimizer_iterations
|
722 |
self.perturbation_factor = perturbation_factor
|
723 |
+
# -- Selection parameters
|
724 |
self.tournament_selection_n = tournament_selection_n
|
725 |
self.tournament_selection_p = tournament_selection_p
|
726 |
# Solver parameters
|
|
|
732 |
self.fast_cycle = fast_cycle
|
733 |
self.precision = precision
|
734 |
# Additional runtime parameters
|
735 |
+
# - Runtime user interface
|
736 |
self.verbosity = verbosity
|
737 |
self.update_verbosity = update_verbosity
|
738 |
self.progress = progress
|
739 |
+
# - Project management
|
740 |
self.equation_file = equation_file
|
741 |
self.temp_equation_file = temp_equation_file
|
742 |
self.tempdir = tempdir
|
|
|
923 |
# Warn if instance parameters are not sensible values:
|
924 |
if self.batch_size < 1:
|
925 |
warnings.warn(
|
926 |
+
"Given :param`batch_size` must be greater than or equal to one. "
|
927 |
+
":param`batch_size` has been increased to equal one."
|
928 |
)
|
929 |
self.batch_size = 1
|
930 |
|
931 |
if n_samples > 10000 and not self.batching:
|
932 |
warnings.warn(
|
933 |
+
"Note: you are running with more than 10,000 datapoints. "
|
934 |
+
"You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). "
|
935 |
+
"You should also reconsider if you need that many datapoints. "
|
936 |
+
"Unless you have a large amount of noise (in which case you "
|
937 |
+
"should smooth your dataset first), generally < 10,000 datapoints "
|
938 |
+
"is enough to find a functional form with symbolic regression. "
|
939 |
+
"More datapoints will lower the search speed."
|
|
|
|
|
940 |
)
|
941 |
|
942 |
# Ensure instance parameters are allowable values:
|
943 |
# ValueError - Incompatible values
|
944 |
+
if self.tournament_selection_n > self.population_size:
|
945 |
raise ValueError(
|
946 |
+
"tournament_selection_n parameter must be smaller than population_size."
|
947 |
)
|
948 |
|
949 |
if self.maxsize > 40:
|
|
|
951 |
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory. You should consider turning `use_frequency` to False, and perhaps use `warmup_maxsize_by`."
|
952 |
)
|
953 |
elif self.maxsize < 7:
|
954 |
+
raise ValueError("PySR requires a maxsize of at least 7")
|
955 |
|
956 |
if self.extra_jax_mappings is not None:
|
957 |
for value in self.extra_jax_mappings.values():
|
|
|
971 |
else:
|
972 |
self.extra_torch_mappings = {}
|
973 |
|
974 |
+
# NotImplementedError - Values that could be supported at a later time
|
975 |
if self.optimizer_algorithm not in self.VALID_OPTIMIZER_ALGORITHMS:
|
976 |
raise NotImplementedError(
|
977 |
f"PySR currently only supports the following optimizer algorithms: {self.VALID_OPTIMIZER_ALGORITHMS}"
|
|
|
1004 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1005 |
Target values. Will be cast to X's dtype if necessary.
|
1006 |
|
1007 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1008 |
+
(n_resampled, n_features), default=None
|
1009 |
Resampled training data used for denoising.
|
1010 |
|
1011 |
variable_names : list[str] of length n_features
|
|
|
1023 |
Validated list of variable names for each feature in `X`.
|
1024 |
|
1025 |
"""
|
|
|
1026 |
if isinstance(X, pd.DataFrame):
|
1027 |
variable_names = None
|
1028 |
warnings.warn(
|
|
|
1037 |
"Spaces have been replaced with underscores. \n"
|
1038 |
"Please rename the columns to valid names."
|
1039 |
)
|
1040 |
+
elif variable_names and [" " in name for name in variable_names].any():
|
1041 |
+
variable_names = [name.replace(" ", "_") for name in variable_names]
|
1042 |
+
warnings.warn(
|
1043 |
+
"Spaces in `variable_names` are not supported. "
|
1044 |
+
"Spaces have been replaced with underscores. \n"
|
1045 |
+
"Please use valid names instead."
|
1046 |
+
)
|
|
|
1047 |
# Only numpy values are needed from Xresampled, column metadata is
|
1048 |
# provided by X
|
1049 |
if isinstance(Xresampled, pd.DataFrame):
|
|
|
1079 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1080 |
Target values. Will be cast to X's dtype if necessary.
|
1081 |
|
1082 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1083 |
+
(n_resampled, n_features), default=None
|
1084 |
Resampled training data used for denoising.
|
1085 |
|
1086 |
variable_names : list[str] of length n_features
|
|
|
1118 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
1119 |
|
1120 |
# Re-perform data validation and feature name updating
|
1121 |
+
X, y = self._validate_data(
|
1122 |
X=X, y=y, reset=True, multi_output=True
|
1123 |
)
|
1124 |
# Update feature names with selected variable names
|
1125 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1126 |
+
print(f"Using features {self.feature_names_in_}")
|
1127 |
|
1128 |
# Denoising transformation
|
1129 |
if self.denoise:
|
1130 |
if self.nout_ > 1:
|
1131 |
+
y = np.stack(
|
1132 |
[
|
1133 |
_denoise(X, y[:, i], Xresampled=Xresampled)[1]
|
1134 |
for i in range(self.nout_)
|
|
|
1168 |
ImportError
|
1169 |
Raised when the julia backend fails to import a package.
|
1170 |
"""
|
1171 |
+
# Need to be global as we don't want to recreate/reinstate julia for
|
1172 |
+
# every new instance of PySRRegressor
|
1173 |
global already_ran
|
1174 |
global Main
|
1175 |
|
|
|
1379 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1380 |
Target values. Will be cast to X's dtype if necessary.
|
1381 |
|
1382 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1383 |
+
(n_resampled, n_features), default=None
|
1384 |
Resampled training data used for denoising.
|
1385 |
|
1386 |
weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
|
|
|
1421 |
X, y, Xresampled, variable_names
|
1422 |
)
|
1423 |
|
1424 |
+
# Warn about large feature counts (still warn if feature count is large
|
1425 |
+
# after running feature selection)
|
1426 |
if self.n_features_in_ >= 10:
|
1427 |
warnings.warn(
|
1428 |
"Note: you are running with 10 features or more. "
|
|
|
1514 |
def predict(self, X, index=None):
|
1515 |
"""Predict y from input X using the equation chosen by `model_selection`.
|
1516 |
|
1517 |
+
You may see what equation is used by printing this object. X should
|
1518 |
+
have the same columns as the training data.
|
1519 |
|
1520 |
Parameters
|
1521 |
----------
|
|
|
1552 |
SymPy representation of the best equation.
|
1553 |
"""
|
1554 |
self.refresh()
|
1555 |
+
best_equation = self.get_best(index=index)
|
1556 |
if self.nout_ > 1:
|
1557 |
+
return [eq["sympy_format"] for eq in best_equation]
|
1558 |
+
return best_equation["sympy_format"]
|
1559 |
|
1560 |
def latex(self, index=None):
|
1561 |
"""Return latex representation of the equation(s) chosen by `model_selection`.
|
|
|
1598 |
Dictionary of callable jax function in "callable" key,
|
1599 |
and jax array of parameters as "parameters" key.
|
1600 |
"""
|
|
|
1601 |
self.set_params(output_jax_format=True)
|
1602 |
self.refresh()
|
1603 |
+
best_equation = self.get_best(index=index)
|
1604 |
if self.nout_ > 1:
|
1605 |
+
return [eq["jax_format"] for eq in best_equation]
|
1606 |
+
return best_equation["jax_format"]
|
1607 |
|
1608 |
def pytorch(self, index=None):
|
1609 |
"""Return pytorch representation of the equation(s) chosen by `model_selection`.
|
|
|
1627 |
"""
|
1628 |
self.set_params(output_torch_format=True)
|
1629 |
self.refresh()
|
1630 |
+
best_equation = self.get_best(index=index)
|
1631 |
if self.nout_ > 1:
|
1632 |
+
return [eq["torch_format"] for eq in best_equation]
|
1633 |
+
return best_equation["torch_format"]
|
1634 |
|
1635 |
def get_hof(self):
|
1636 |
"""Get the equations from a hall of fame file. If no arguments
|
|
|
1797 |
"""Use a gradient boosting tree regressor as a proxy for finding
|
1798 |
the k most important features in X, returning indices for those
|
1799 |
features as output."""
|
|
|
1800 |
from sklearn.ensemble import RandomForestRegressor
|
1801 |
from sklearn.feature_selection import SelectFromModel
|
1802 |
|