Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
874bbe6
1
Parent(s):
891ed86
Fix docs style issues
Browse files- pysr/sr.py +27 -19
pysr/sr.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import copy
|
2 |
import os
|
3 |
import sys
|
@@ -879,7 +880,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
879 |
|
880 |
def __repr__(self):
|
881 |
"""
|
882 |
-
|
883 |
|
884 |
The string `>>>>` denotes which equation is selected by the
|
885 |
`model_selection`.
|
@@ -926,7 +927,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
926 |
|
927 |
def __getstate__(self):
|
928 |
"""
|
929 |
-
|
930 |
|
931 |
The Scikit-learn standard requires estimators to be serializable via
|
932 |
`pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
|
@@ -988,9 +989,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
988 |
return pickled_state
|
989 |
|
990 |
def _checkpoint(self):
|
991 |
-
"""
|
992 |
|
993 |
-
This should only be used internally by PySRRegressor.
|
|
|
994 |
# Save model state:
|
995 |
self.show_pickle_warnings_ = False
|
996 |
with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
|
@@ -1051,7 +1053,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1051 |
|
1052 |
def _setup_equation_file(self):
|
1053 |
"""
|
1054 |
-
|
|
|
|
|
1055 |
:param`equation_file`.
|
1056 |
"""
|
1057 |
# Cast tempdir string as a Path object
|
@@ -1072,7 +1076,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1072 |
|
1073 |
def _validate_and_set_init_params(self):
|
1074 |
"""
|
1075 |
-
|
1076 |
|
1077 |
Also returns a dictionary of parameters to update from their
|
1078 |
values given at initialization.
|
@@ -1171,7 +1175,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1171 |
|
1172 |
def _validate_and_set_fit_params(self, X, y, Xresampled, weights, variable_names):
|
1173 |
"""
|
1174 |
-
|
1175 |
|
1176 |
This method also sets the `nout_` attribute.
|
1177 |
|
@@ -1257,7 +1261,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1257 |
self, X, y, Xresampled, variable_names, random_state
|
1258 |
):
|
1259 |
"""
|
1260 |
-
|
1261 |
|
1262 |
This method also updates/sets the `selection_mask_` attribute.
|
1263 |
|
@@ -1712,8 +1716,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1712 |
|
1713 |
def refresh(self, checkpoint_file=None):
|
1714 |
"""
|
1715 |
-
|
1716 |
-
|
|
|
|
|
1717 |
|
1718 |
Parameters
|
1719 |
----------
|
@@ -1916,7 +1922,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1916 |
return best_equation["torch_format"]
|
1917 |
|
1918 |
def _read_equation_file(self):
|
1919 |
-
"""Read the hall of fame file created by SymbolicRegression.jl"""
|
1920 |
try:
|
1921 |
if self.nout_ > 1:
|
1922 |
all_outputs = []
|
@@ -1957,8 +1963,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1957 |
return all_outputs
|
1958 |
|
1959 |
def get_hof(self):
|
1960 |
-
"""Get the equations from a hall of fame file.
|
1961 |
-
|
|
|
|
|
|
|
1962 |
check_is_fitted(
|
1963 |
self,
|
1964 |
attributes=[
|
@@ -2159,10 +2168,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2159 |
|
2160 |
|
2161 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
2162 |
-
"""
|
2163 |
-
Return the index of the selected expression, given a dataframe of
|
2164 |
-
equations and a model selection.
|
2165 |
-
"""
|
2166 |
if model_selection == "accuracy":
|
2167 |
chosen_idx = equations["loss"].idxmin()
|
2168 |
elif model_selection == "best":
|
@@ -2179,7 +2185,7 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
|
2179 |
|
2180 |
|
2181 |
def _denoise(X, y, Xresampled=None, random_state=None):
|
2182 |
-
"""Denoise the dataset using a Gaussian process"""
|
2183 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
2184 |
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
|
2185 |
|
@@ -2208,7 +2214,9 @@ def _handle_feature_selection(X, select_k_features, y, variable_names):
|
|
2208 |
|
2209 |
def run_feature_selection(X, y, select_k_features, random_state=None):
|
2210 |
"""
|
2211 |
-
|
|
|
|
|
2212 |
the k most important features in X, returning indices for those
|
2213 |
features as output.
|
2214 |
"""
|
|
|
1 |
+
"""Defines the PySRRegressor scikit-learn interface."""
|
2 |
import copy
|
3 |
import os
|
4 |
import sys
|
|
|
880 |
|
881 |
def __repr__(self):
|
882 |
"""
|
883 |
+
Print all current equations fitted by the model.
|
884 |
|
885 |
The string `>>>>` denotes which equation is selected by the
|
886 |
`model_selection`.
|
|
|
927 |
|
928 |
def __getstate__(self):
|
929 |
"""
|
930 |
+
Handle pickle serialization for PySRRegressor.
|
931 |
|
932 |
The Scikit-learn standard requires estimators to be serializable via
|
933 |
`pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
|
|
|
989 |
return pickled_state
|
990 |
|
991 |
def _checkpoint(self):
|
992 |
+
"""Save the model's current state to a checkpoint file.
|
993 |
|
994 |
+
This should only be used internally by PySRRegressor.
|
995 |
+
"""
|
996 |
# Save model state:
|
997 |
self.show_pickle_warnings_ = False
|
998 |
with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
|
|
|
1053 |
|
1054 |
def _setup_equation_file(self):
|
1055 |
"""
|
1056 |
+
Set the full pathname of the equation file.
|
1057 |
+
|
1058 |
+
This is performed using :param`tempdir` and
|
1059 |
:param`equation_file`.
|
1060 |
"""
|
1061 |
# Cast tempdir string as a Path object
|
|
|
1076 |
|
1077 |
def _validate_and_set_init_params(self):
|
1078 |
"""
|
1079 |
+
Ensure parameters passed at initialization are valid.
|
1080 |
|
1081 |
Also returns a dictionary of parameters to update from their
|
1082 |
values given at initialization.
|
|
|
1175 |
|
1176 |
def _validate_and_set_fit_params(self, X, y, Xresampled, weights, variable_names):
|
1177 |
"""
|
1178 |
+
Validate the parameters passed to the :term`fit` method.
|
1179 |
|
1180 |
This method also sets the `nout_` attribute.
|
1181 |
|
|
|
1261 |
self, X, y, Xresampled, variable_names, random_state
|
1262 |
):
|
1263 |
"""
|
1264 |
+
Transform the training data before fitting the symbolic regressor.
|
1265 |
|
1266 |
This method also updates/sets the `selection_mask_` attribute.
|
1267 |
|
|
|
1716 |
|
1717 |
def refresh(self, checkpoint_file=None):
|
1718 |
"""
|
1719 |
+
Update self.equations_ with any new options passed.
|
1720 |
+
|
1721 |
+
For example, updating :param`extra_sympy_mappings`
|
1722 |
+
will require a `.refresh()` to update the equations.
|
1723 |
|
1724 |
Parameters
|
1725 |
----------
|
|
|
1922 |
return best_equation["torch_format"]
|
1923 |
|
1924 |
def _read_equation_file(self):
|
1925 |
+
"""Read the hall of fame file created by `SymbolicRegression.jl`."""
|
1926 |
try:
|
1927 |
if self.nout_ > 1:
|
1928 |
all_outputs = []
|
|
|
1963 |
return all_outputs
|
1964 |
|
1965 |
def get_hof(self):
|
1966 |
+
"""Get the equations from a hall of fame file.
|
1967 |
+
|
1968 |
+
If no arguments entered, the ones used
|
1969 |
+
previously from a call to PySR will be used.
|
1970 |
+
"""
|
1971 |
check_is_fitted(
|
1972 |
self,
|
1973 |
attributes=[
|
|
|
2168 |
|
2169 |
|
2170 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
2171 |
+
"""Select an expression and return its index."""
|
|
|
|
|
|
|
2172 |
if model_selection == "accuracy":
|
2173 |
chosen_idx = equations["loss"].idxmin()
|
2174 |
elif model_selection == "best":
|
|
|
2185 |
|
2186 |
|
2187 |
def _denoise(X, y, Xresampled=None, random_state=None):
|
2188 |
+
"""Denoise the dataset using a Gaussian process."""
|
2189 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
2190 |
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
|
2191 |
|
|
|
2214 |
|
2215 |
def run_feature_selection(X, y, select_k_features, random_state=None):
|
2216 |
"""
|
2217 |
+
Find most important features.
|
2218 |
+
|
2219 |
+
Uses a gradient boosting tree regressor as a proxy for finding
|
2220 |
the k most important features in X, returning indices for those
|
2221 |
features as output.
|
2222 |
"""
|