Spaces:
Running
Running
File size: 4,164 Bytes
cd54791 d26d668 0bf77e2 d26d668 cd54791 d26d668 cd54791 d26d668 9905c12 d26d668 cd54791 0bf77e2 cd54791 4582e28 0bf77e2 cd54791 0bf77e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
from pysr import pysr, best_row
from sklearn.base import BaseEstimator, RegressorMixin
import inspect
import pandas as pd
class PySRRegressor(BaseEstimator, RegressorMixin):
def __init__(self, model_selection="accuracy", **params):
"""Initialize settings for pysr.pysr call.
:param model_selection: How to select a model. Can be 'accuracy' or 'best'. 'best' will optimize a combination of complexity and accuracy.
:type model_selection: str
"""
super().__init__()
self.model_selection = model_selection
self.params = params
# Stored equations:
self.equations = None
def __repr__(self):
if self.equations is None:
return "PySRRegressor.equations=None"
equations = self.equations
selected = ["" for _ in range(len(equations))]
if self.model_selection == "accuracy":
chosen_row = -1
elif self.model_selection == "best":
chosen_row = equations["score"].idxmax()
else:
raise NotImplementedError
selected[chosen_row] = ">"
output = "PySRRegressor.equations=[\n"
repr_equations = pd.DataFrame(
dict(
selected=selected,
score=equations["score"],
Equation=equations["Equation"],
MSE=equations["MSE"],
Complexity=equations["Complexity"],
)
)
output += repr_equations.__repr__()
output += "\n]"
return output
def set_params(self, **params):
"""Set parameters for pysr.pysr call or model_selection strategy."""
for key, value in params.items():
if key == "model_selection":
self.model_selection = value
self.params[key] = value
return self
def get_params(self, deep=True):
del deep
return {**self.params, "model_selection": self.model_selection}
def get_best(self):
if self.equations is None:
return 0.0
if self.model_selection == "accuracy":
return self.equations.iloc[-1]
elif self.model_selection == "best":
return best_row(self.equations)
else:
raise NotImplementedError
def fit(self, X, y, weights=None, variable_names=None):
"""Search for equations to fit the dataset.
:param X: 2D array. Rows are examples, columns are features. If pandas DataFrame, the columns are used for variable names (so make sure they don't contain spaces).
:type X: np.ndarray/pandas.DataFrame
:param y: 1D array (rows are examples) or 2D array (rows are examples, columns are outputs). Putting in a 2D array will trigger a search for equations for each feature of y.
:type y: np.ndarray
:param weights: Optional. Same shape as y. Each element is how to weight the mean-square-error loss for that particular element of y.
:type weights: np.ndarray
:param variable_names: a list of names for the variables, other than "x0", "x1", etc.
:type variable_names: list
"""
if variable_names is None:
if "variable_names" in self.params:
variable_names = self.params["variable_names"]
self.equations = pysr(
X=X,
y=y,
weights=weights,
variable_names=variable_names,
**{k: v for k, v in self.params.items() if k != "variable_names"},
)
return self
def predict(self, X):
equation_row = self.get_best()
np_format = equation_row["lambda_format"]
return np_format(X)
# Add the docs from pysr() to PySRRegressor():
_pysr_docstring_split = []
_start_recording = False
for line in inspect.getdoc(pysr).split("\n"):
# Skip docs on "X" and "y"
if ":param binary_operators:" in line:
_start_recording = True
if ":returns:" in line:
_start_recording = False
if _start_recording:
_pysr_docstring_split.append(line)
_pysr_docstring = "\n\t".join(_pysr_docstring_split)
PySRRegressor.__init__.__doc__ += _pysr_docstring
|