Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Jun 5, 2022

Commit

03d5a42

1 Parent(s): 3c4243b

Attempt to fix unit tests of equation file

Browse files

Files changed (2) hide show

pysr/sr.py +26 -13
test/test.py +12 -4

pysr/sr.py CHANGED Viewed

@@ -559,6 +559,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
         The state for the julia SymbolicRegression.jl backend post fitting.
     Notes
     -----
     Most default parameters have been tuned over several example equations,
@@ -959,6 +962,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
         else:
             self.equation_file_ = self.equation_file
     def _validate_and_set_init_params(self):
         """
@@ -1599,6 +1603,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         check_is_fitted(self, attributes=["equation_file_"])
         if checkpoint_file:
             self.equation_file_ = checkpoint_file
         self.equations_ = self.get_hof()
     def predict(self, X, index=None):
@@ -1771,18 +1776,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             return [eq["torch_format"] for eq in best_equation]
         return best_equation["torch_format"]
-    def get_hof(self):
-        """Get the equations from a hall of fame file. If no arguments
-        entered, the ones used previously from a call to PySR will be used."""
-        check_is_fitted(
-            self,
-            attributes=[
-                "nout_",
-                "equation_file_",
-                "selection_mask_",
-                "feature_names_in_",
-            ],
-        )
         try:
             if self.nout_ > 1:
                 all_outputs = []
@@ -1817,6 +1812,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 "Couldn't find equation file! The equation search likely exited "
                 "before a single iteration completed."
             )
         # It is expected extra_jax/torch_mappings will be updated after fit.
         # Thus, validation is performed here instead of in _validate_init_params
@@ -1843,7 +1856,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         ret_outputs = []
-        for output in all_outputs:
             scores = []
             lastMSE = None

     raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
         The state for the julia SymbolicRegression.jl backend post fitting.
+    equation_file_contents_ : list[pandas.DataFrame]
+        Contents of the equation file output by the Julia backend.
     Notes
     -----
     Most default parameters have been tuned over several example equations,
                 self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
         else:
             self.equation_file_ = self.equation_file
+        self.equation_file_contents_ = None
     def _validate_and_set_init_params(self):
         """
         check_is_fitted(self, attributes=["equation_file_"])
         if checkpoint_file:
             self.equation_file_ = checkpoint_file
+            self.equation_file_contents_ = None
         self.equations_ = self.get_hof()
     def predict(self, X, index=None):
             return [eq["torch_format"] for eq in best_equation]
         return best_equation["torch_format"]
+    def _read_equation_file(self):
+        """Read the hall of fame file created by SymbolicRegression.jl"""
         try:
             if self.nout_ > 1:
                 all_outputs = []
                 "Couldn't find equation file! The equation search likely exited "
                 "before a single iteration completed."
             )
+        return all_outputs
+    def get_hof(self):
+        """Get the equations from a hall of fame file. If no arguments
+        entered, the ones used previously from a call to PySR will be used."""
+        check_is_fitted(
+            self,
+            attributes=[
+                "nout_",
+                "equation_file_",
+                "selection_mask_",
+                "feature_names_in_",
+            ],
+        )
+        if (
+            not hasattr(self, "equation_file_contents_")
+        ) or self.equation_file_contents_ is None:
+            self.equation_file_contents_ = self._read_equation_file()
         # It is expected extra_jax/torch_mappings will be updated after fit.
         # Thus, validation is performed here instead of in _validate_init_params
         ret_outputs = []
+        for output in self.equation_file_contents_:
             scores = []
             lastMSE = None

test/test.py CHANGED Viewed

@@ -115,7 +115,6 @@ class TestPipeline(unittest.TestCase):
             extra_sympy_mappings={"sq": lambda x: x**2},
             **self.default_test_kwargs,
             procs=0,
-            temp_equation_file=True,
             delete_tempfiles=False,
             early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
         )
@@ -158,8 +157,13 @@ class TestPipeline(unittest.TestCase):
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Test if repeated fit works:
-        regressor.set_params(niterations=0, warm_start=True, early_stop_condition=None)
-        # This should exit immediately, and use the old equations
         regressor.fit(X, y)
         self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
@@ -272,7 +276,6 @@ class TestBest(unittest.TestCase):
             model_selection="accuracy",
             equation_file="equation_file.csv",
         )
-        self.model.fit(self.X, self.y)
         equations = pd.DataFrame(
             {
                 "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
@@ -281,6 +284,11 @@ class TestBest(unittest.TestCase):
             }
         )
         equations["complexity loss equation".split(" ")].to_csv(
             "equation_file.csv.bkup", sep="|"
         )

             extra_sympy_mappings={"sq": lambda x: x**2},
             **self.default_test_kwargs,
             procs=0,
             delete_tempfiles=False,
             early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
         )
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Test if repeated fit works:
+        regressor.set_params(
+            niterations=1,
+            ncyclesperiteration=2,
+            warm_start=True,
+            early_stop_condition=None,
+        )
+        # This should exit almost immediately, and use the old equations
         regressor.fit(X, y)
         self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
             model_selection="accuracy",
             equation_file="equation_file.csv",
         )
         equations = pd.DataFrame(
             {
                 "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
             }
         )
+        # Set up internal parameters as if it had been fitted:
+        self.model.equation_file_ = "equation_file.csv"
+        self.model.nout_ = 1
+        self.model.selection_mask_ = None
+        self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
         equations["complexity loss equation".split(" ")].to_csv(
             "equation_file.csv.bkup", sep="|"
         )