Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
03d5a42
1
Parent(s):
3c4243b
Attempt to fix unit tests of equation file
Browse files- pysr/sr.py +26 -13
- test/test.py +12 -4
pysr/sr.py
CHANGED
@@ -559,6 +559,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
559 |
raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
|
560 |
The state for the julia SymbolicRegression.jl backend post fitting.
|
561 |
|
|
|
|
|
|
|
562 |
Notes
|
563 |
-----
|
564 |
Most default parameters have been tuned over several example equations,
|
@@ -959,6 +962,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
959 |
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
960 |
else:
|
961 |
self.equation_file_ = self.equation_file
|
|
|
962 |
|
963 |
def _validate_and_set_init_params(self):
|
964 |
"""
|
@@ -1599,6 +1603,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1599 |
check_is_fitted(self, attributes=["equation_file_"])
|
1600 |
if checkpoint_file:
|
1601 |
self.equation_file_ = checkpoint_file
|
|
|
1602 |
self.equations_ = self.get_hof()
|
1603 |
|
1604 |
def predict(self, X, index=None):
|
@@ -1771,18 +1776,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1771 |
return [eq["torch_format"] for eq in best_equation]
|
1772 |
return best_equation["torch_format"]
|
1773 |
|
1774 |
-
def
|
1775 |
-
"""
|
1776 |
-
entered, the ones used previously from a call to PySR will be used."""
|
1777 |
-
check_is_fitted(
|
1778 |
-
self,
|
1779 |
-
attributes=[
|
1780 |
-
"nout_",
|
1781 |
-
"equation_file_",
|
1782 |
-
"selection_mask_",
|
1783 |
-
"feature_names_in_",
|
1784 |
-
],
|
1785 |
-
)
|
1786 |
try:
|
1787 |
if self.nout_ > 1:
|
1788 |
all_outputs = []
|
@@ -1817,6 +1812,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1817 |
"Couldn't find equation file! The equation search likely exited "
|
1818 |
"before a single iteration completed."
|
1819 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1820 |
|
1821 |
# It is expected extra_jax/torch_mappings will be updated after fit.
|
1822 |
# Thus, validation is performed here instead of in _validate_init_params
|
@@ -1843,7 +1856,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1843 |
|
1844 |
ret_outputs = []
|
1845 |
|
1846 |
-
for output in
|
1847 |
|
1848 |
scores = []
|
1849 |
lastMSE = None
|
|
|
559 |
raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
|
560 |
The state for the julia SymbolicRegression.jl backend post fitting.
|
561 |
|
562 |
+
equation_file_contents_ : list[pandas.DataFrame]
|
563 |
+
Contents of the equation file output by the Julia backend.
|
564 |
+
|
565 |
Notes
|
566 |
-----
|
567 |
Most default parameters have been tuned over several example equations,
|
|
|
962 |
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
963 |
else:
|
964 |
self.equation_file_ = self.equation_file
|
965 |
+
self.equation_file_contents_ = None
|
966 |
|
967 |
def _validate_and_set_init_params(self):
|
968 |
"""
|
|
|
1603 |
check_is_fitted(self, attributes=["equation_file_"])
|
1604 |
if checkpoint_file:
|
1605 |
self.equation_file_ = checkpoint_file
|
1606 |
+
self.equation_file_contents_ = None
|
1607 |
self.equations_ = self.get_hof()
|
1608 |
|
1609 |
def predict(self, X, index=None):
|
|
|
1776 |
return [eq["torch_format"] for eq in best_equation]
|
1777 |
return best_equation["torch_format"]
|
1778 |
|
1779 |
+
def _read_equation_file(self):
|
1780 |
+
"""Read the hall of fame file created by SymbolicRegression.jl"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1781 |
try:
|
1782 |
if self.nout_ > 1:
|
1783 |
all_outputs = []
|
|
|
1812 |
"Couldn't find equation file! The equation search likely exited "
|
1813 |
"before a single iteration completed."
|
1814 |
)
|
1815 |
+
return all_outputs
|
1816 |
+
|
1817 |
+
def get_hof(self):
|
1818 |
+
"""Get the equations from a hall of fame file. If no arguments
|
1819 |
+
entered, the ones used previously from a call to PySR will be used."""
|
1820 |
+
check_is_fitted(
|
1821 |
+
self,
|
1822 |
+
attributes=[
|
1823 |
+
"nout_",
|
1824 |
+
"equation_file_",
|
1825 |
+
"selection_mask_",
|
1826 |
+
"feature_names_in_",
|
1827 |
+
],
|
1828 |
+
)
|
1829 |
+
if (
|
1830 |
+
not hasattr(self, "equation_file_contents_")
|
1831 |
+
) or self.equation_file_contents_ is None:
|
1832 |
+
self.equation_file_contents_ = self._read_equation_file()
|
1833 |
|
1834 |
# It is expected extra_jax/torch_mappings will be updated after fit.
|
1835 |
# Thus, validation is performed here instead of in _validate_init_params
|
|
|
1856 |
|
1857 |
ret_outputs = []
|
1858 |
|
1859 |
+
for output in self.equation_file_contents_:
|
1860 |
|
1861 |
scores = []
|
1862 |
lastMSE = None
|
test/test.py
CHANGED
@@ -115,7 +115,6 @@ class TestPipeline(unittest.TestCase):
|
|
115 |
extra_sympy_mappings={"sq": lambda x: x**2},
|
116 |
**self.default_test_kwargs,
|
117 |
procs=0,
|
118 |
-
temp_equation_file=True,
|
119 |
delete_tempfiles=False,
|
120 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
|
121 |
)
|
@@ -158,8 +157,13 @@ class TestPipeline(unittest.TestCase):
|
|
158 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
159 |
|
160 |
# Test if repeated fit works:
|
161 |
-
regressor.set_params(
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
163 |
regressor.fit(X, y)
|
164 |
|
165 |
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
@@ -272,7 +276,6 @@ class TestBest(unittest.TestCase):
|
|
272 |
model_selection="accuracy",
|
273 |
equation_file="equation_file.csv",
|
274 |
)
|
275 |
-
self.model.fit(self.X, self.y)
|
276 |
equations = pd.DataFrame(
|
277 |
{
|
278 |
"equation": ["1.0", "cos(x0)", "square(cos(x0))"],
|
@@ -281,6 +284,11 @@ class TestBest(unittest.TestCase):
|
|
281 |
}
|
282 |
)
|
283 |
|
|
|
|
|
|
|
|
|
|
|
284 |
equations["complexity loss equation".split(" ")].to_csv(
|
285 |
"equation_file.csv.bkup", sep="|"
|
286 |
)
|
|
|
115 |
extra_sympy_mappings={"sq": lambda x: x**2},
|
116 |
**self.default_test_kwargs,
|
117 |
procs=0,
|
|
|
118 |
delete_tempfiles=False,
|
119 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
|
120 |
)
|
|
|
157 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
158 |
|
159 |
# Test if repeated fit works:
|
160 |
+
regressor.set_params(
|
161 |
+
niterations=1,
|
162 |
+
ncyclesperiteration=2,
|
163 |
+
warm_start=True,
|
164 |
+
early_stop_condition=None,
|
165 |
+
)
|
166 |
+
# This should exit almost immediately, and use the old equations
|
167 |
regressor.fit(X, y)
|
168 |
|
169 |
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
|
|
276 |
model_selection="accuracy",
|
277 |
equation_file="equation_file.csv",
|
278 |
)
|
|
|
279 |
equations = pd.DataFrame(
|
280 |
{
|
281 |
"equation": ["1.0", "cos(x0)", "square(cos(x0))"],
|
|
|
284 |
}
|
285 |
)
|
286 |
|
287 |
+
# Set up internal parameters as if it had been fitted:
|
288 |
+
self.model.equation_file_ = "equation_file.csv"
|
289 |
+
self.model.nout_ = 1
|
290 |
+
self.model.selection_mask_ = None
|
291 |
+
self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
|
292 |
equations["complexity loss equation".split(" ")].to_csv(
|
293 |
"equation_file.csv.bkup", sep="|"
|
294 |
)
|