Spaces:
Running
Running
MilesCranmer
commited on
Commit
•
32d0b3a
1
Parent(s):
623e6f0
Documentation cleanup
Browse files- pysr/sr.py +15 -9
pysr/sr.py
CHANGED
@@ -987,7 +987,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
987 |
):
|
988 |
raise ValueError(
|
989 |
"To ensure deterministic searches, you must set `random_state` to a seed, "
|
990 |
-
"`
|
991 |
)
|
992 |
|
993 |
if self.random_state != None and (not self.deterministic or self.procs != 0):
|
@@ -1006,7 +1006,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1006 |
# 'Mutable' parameter validation
|
1007 |
buffer_available = "buffer" in sys.stdout.__dir__()
|
1008 |
# Params and their default values, if None is given:
|
1009 |
-
|
1010 |
"binary_operators": "+ * - /".split(" "),
|
1011 |
"unary_operators": [],
|
1012 |
"maxdepth": self.maxsize,
|
@@ -1017,7 +1017,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1017 |
"progress": buffer_available,
|
1018 |
}
|
1019 |
packed_modified_params = {}
|
1020 |
-
for parameter, default_value in
|
1021 |
parameter_value = getattr(self, parameter)
|
1022 |
if parameter_value is None:
|
1023 |
parameter_value = default_value
|
@@ -1093,7 +1093,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1093 |
variable_names = None
|
1094 |
warnings.warn(
|
1095 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1096 |
-
"
|
1097 |
)
|
1098 |
|
1099 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
@@ -1480,21 +1480,26 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1480 |
|
1481 |
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1482 |
(n_resampled, n_features), default=None
|
1483 |
-
Resampled training data
|
|
|
1484 |
|
1485 |
weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
|
1486 |
Each element is how to weight the mean-square-error loss
|
1487 |
-
for that particular element of y
|
|
|
|
|
1488 |
|
1489 |
variable_names : list[str], default=None
|
1490 |
A list of names for the variables, rather than "x0", "x1", etc.
|
1491 |
-
If :param`X` is a pandas dataframe, the column names will be used
|
1492 |
-
|
|
|
|
|
1493 |
|
1494 |
Returns
|
1495 |
-------
|
1496 |
self : object
|
1497 |
-
Fitted
|
1498 |
"""
|
1499 |
# Init attributes that are not specified in BaseEstimator
|
1500 |
if self.warm_start and hasattr(self, "raw_julia_state_"):
|
@@ -1892,6 +1897,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1892 |
cur_score = 0.0
|
1893 |
else:
|
1894 |
if curMSE > 0.0:
|
|
|
1895 |
cur_score = -np.log(curMSE / lastMSE) / (
|
1896 |
curComplexity - lastComplexity
|
1897 |
)
|
|
|
987 |
):
|
988 |
raise ValueError(
|
989 |
"To ensure deterministic searches, you must set `random_state` to a seed, "
|
990 |
+
"`procs` to `0`, and `multithreading` to `False` or `None`."
|
991 |
)
|
992 |
|
993 |
if self.random_state != None and (not self.deterministic or self.procs != 0):
|
|
|
1006 |
# 'Mutable' parameter validation
|
1007 |
buffer_available = "buffer" in sys.stdout.__dir__()
|
1008 |
# Params and their default values, if None is given:
|
1009 |
+
default_param_mapping = {
|
1010 |
"binary_operators": "+ * - /".split(" "),
|
1011 |
"unary_operators": [],
|
1012 |
"maxdepth": self.maxsize,
|
|
|
1017 |
"progress": buffer_available,
|
1018 |
}
|
1019 |
packed_modified_params = {}
|
1020 |
+
for parameter, default_value in default_param_mapping.items():
|
1021 |
parameter_value = getattr(self, parameter)
|
1022 |
if parameter_value is None:
|
1023 |
parameter_value = default_value
|
|
|
1093 |
variable_names = None
|
1094 |
warnings.warn(
|
1095 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1096 |
+
"Using DataFrame column names instead."
|
1097 |
)
|
1098 |
|
1099 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
|
|
1480 |
|
1481 |
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1482 |
(n_resampled, n_features), default=None
|
1483 |
+
Resampled training data to generate a denoised data on. This
|
1484 |
+
will be used as the training data, rather than `X`.
|
1485 |
|
1486 |
weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
|
1487 |
Each element is how to weight the mean-square-error loss
|
1488 |
+
for that particular element of `y`. Alternatively,
|
1489 |
+
if a custom `loss` was set, it will can be used
|
1490 |
+
in arbitrary ways.
|
1491 |
|
1492 |
variable_names : list[str], default=None
|
1493 |
A list of names for the variables, rather than "x0", "x1", etc.
|
1494 |
+
If :param`X` is a pandas dataframe, the column names will be used
|
1495 |
+
instead of `variable_names`. Cannot contain spaces or special
|
1496 |
+
characters. Avoid variable names which are also
|
1497 |
+
function names in `sympy`, such as "N".
|
1498 |
|
1499 |
Returns
|
1500 |
-------
|
1501 |
self : object
|
1502 |
+
Fitted estimator.
|
1503 |
"""
|
1504 |
# Init attributes that are not specified in BaseEstimator
|
1505 |
if self.warm_start and hasattr(self, "raw_julia_state_"):
|
|
|
1897 |
cur_score = 0.0
|
1898 |
else:
|
1899 |
if curMSE > 0.0:
|
1900 |
+
# TODO Move this to more obvious function/file.
|
1901 |
cur_score = -np.log(curMSE / lastMSE) / (
|
1902 |
curComplexity - lastComplexity
|
1903 |
)
|