Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

tttc3 commited on May 27, 2022

Commit

c7187a6

•

1 Parent(s): 73c6ffd

Updated tests for compatibility with refactor

Browse files

Files changed (4) hide show

pysr/sr.py +1 -1
test/test.py +21 -18
test/test_jax.py +31 -4
test/test_torch.py +39 -10

pysr/sr.py CHANGED Viewed

@@ -1029,7 +1029,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
                 ":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
                 "Will use DataFrame column names instead."
             )
             if X.columns.is_object() and X.columns.str.contains(" ").any():
                 X.columns = X.columns.str.replace(" ", "_")
                 warnings.warn(

                 ":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
                 "Will use DataFrame column names instead."
             )
             if X.columns.is_object() and X.columns.str.contains(" ").any():
                 X.columns = X.columns.str.replace(" ", "_")
                 warnings.warn(

test/test.py CHANGED Viewed

@@ -3,6 +3,7 @@ import unittest
 from unittest.mock import patch
 import numpy as np
 from pysr import PySRRegressor
 from pysr.sr import run_feature_selection, _handle_feature_selection
 import sympy
 from sympy import lambdify
@@ -21,7 +22,7 @@ class TestPipeline(unittest.TestCase):
             inspect.signature(PySRRegressor.__init__).parameters["populations"].default
         )
         self.default_test_kwargs = dict(
-            model_selection="accuracy",
             niterations=default_niterations * 2,
             populations=default_populations * 2,
         )
@@ -32,15 +33,15 @@ class TestPipeline(unittest.TestCase):
         y = self.X[:, 0]
         model = PySRRegressor(**self.default_test_kwargs)
         model.fit(self.X, y)
-        print(model.equations)
         self.assertLessEqual(model.get_best()["loss"], 1e-4)
     def test_multiprocessing(self):
         y = self.X[:, 0]
         model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
         model.fit(self.X, y)
-        print(model.equations)
-        self.assertLessEqual(model.equations.iloc[-1]["loss"], 1e-4)
     def test_multioutput_custom_operator_quiet_custom_complexity(self):
         y = self.X[:, [0, 1]] ** 2
@@ -57,9 +58,9 @@ class TestPipeline(unittest.TestCase):
             constraints={"square_op": 10},
         )
         model.fit(self.X, y)
-        equations = model.equations
         print(equations)
-        self.assertIn("square_op", model.equations[0].iloc[-1]["equation"])
         self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
         self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
@@ -130,14 +131,14 @@ class TestPipeline(unittest.TestCase):
         self.assertTrue("None" not in regressor.__repr__())
         self.assertTrue(">>>>" in regressor.__repr__())
-        self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Test if repeated fit works:
         regressor.set_params(niterations=0)
         regressor.fit(X, y)
-        self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Tweak model selection:
@@ -188,12 +189,11 @@ class TestPipeline(unittest.TestCase):
             unary_operators=[],
             binary_operators=["+", "*", "/", "-"],
             **self.default_test_kwargs,
-            Xresampled=Xresampled,
             denoise=True,
             select_k_features=2,
             nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
         )
-        model.fit(X, y)
         self.assertNotIn("unused_feature", model.latex())
         self.assertIn("T", model.latex())
         self.assertIn("x", model.latex())
@@ -232,10 +232,13 @@ class TestBest(unittest.TestCase):
             output_jax_format=False,
             model_selection="accuracy",
         )
-        self.model.n_features = 2
-        self.model.refresh()
-        self.equations = self.model.equations
         self.rstate = np.random.RandomState(0)
     def test_best(self):
         self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
@@ -250,9 +253,9 @@ class TestBest(unittest.TestCase):
         self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
     def test_best_lambda(self):
-        X = self.rstate.randn(10, 2)
-        y = np.cos(X[:, 0]) ** 2
-        for f in [self.model.predict, self.equations.iloc[-1]["lambda_format"]]:
             np.testing.assert_almost_equal(f(X), y, decimal=4)
@@ -292,12 +295,12 @@ class TestMiscellaneous(unittest.TestCase):
         This should give a warning, and sets the correct value.
         """
-        with self.assertWarns(UserWarning):
             model = PySRRegressor(fractionReplaced=0.2)
         # This is a deprecated parameter, so we should get a warning.
         # The correct value should be set:
-        self.assertEqual(model.params["fraction_replaced"], 0.2)
     def test_size_warning(self):
         """Ensure that a warning is given for a large input size."""

 from unittest.mock import patch
 import numpy as np
 from pysr import PySRRegressor
 from pysr.sr import run_feature_selection, _handle_feature_selection
 import sympy
 from sympy import lambdify
             inspect.signature(PySRRegressor.__init__).parameters["populations"].default
         )
         self.default_test_kwargs = dict(
+            model_selection="best",
             niterations=default_niterations * 2,
             populations=default_populations * 2,
         )
         y = self.X[:, 0]
         model = PySRRegressor(**self.default_test_kwargs)
         model.fit(self.X, y)
+        print(model.equations_)
         self.assertLessEqual(model.get_best()["loss"], 1e-4)
     def test_multiprocessing(self):
         y = self.X[:, 0]
         model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
         model.fit(self.X, y)
+        print(model.equations_)
+        self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
     def test_multioutput_custom_operator_quiet_custom_complexity(self):
         y = self.X[:, [0, 1]] ** 2
             constraints={"square_op": 10},
         )
         model.fit(self.X, y)
+        equations = model.equations_
         print(equations)
+        self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
         self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
         self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
         self.assertTrue("None" not in regressor.__repr__())
         self.assertTrue(">>>>" in regressor.__repr__())
+        self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Test if repeated fit works:
         regressor.set_params(niterations=0)
         regressor.fit(X, y)
+        self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
         np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
         # Tweak model selection:
             unary_operators=[],
             binary_operators=["+", "*", "/", "-"],
             **self.default_test_kwargs,
             denoise=True,
             select_k_features=2,
             nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
         )
+        model.fit(X, y, Xresampled=Xresampled)
         self.assertNotIn("unused_feature", model.latex())
         self.assertIn("T", model.latex())
         self.assertIn("x", model.latex())
             output_jax_format=False,
             model_selection="accuracy",
         )
         self.rstate = np.random.RandomState(0)
+        # Placeholder values needed to fit the model from an equation file
+        self.X = self.rstate.randn(10, 2)
+        self.y = np.cos(self.X[:, 0]) ** 2
+        self.model.fit(self.X, self.y, from_equation_file=True)
+        self.model.refresh()
+        self.equations_ = self.model.equations_
     def test_best(self):
         self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
         self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
     def test_best_lambda(self):
+        X = self.X
+        y = self.y
+        for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
             np.testing.assert_almost_equal(f(X), y, decimal=4)
         This should give a warning, and sets the correct value.
         """
+        with self.assertWarns(FutureWarning):
             model = PySRRegressor(fractionReplaced=0.2)
         # This is a deprecated parameter, so we should get a warning.
         # The correct value should be set:
+        self.assertEqual(model.fraction_replaced, 0.2)
     def test_size_warning(self):
         """Ensure that a warning is given for a large input size."""

test/test_jax.py CHANGED Viewed

@@ -4,7 +4,6 @@ from pysr import sympy2jax, PySRRegressor
 import pandas as pd
 from jax import numpy as jnp
 from jax import random
-from jax import grad
 import sympy
@@ -21,6 +20,36 @@ class TestJAX(unittest.TestCase):
         f, params = sympy2jax(cosx, [x, y, z])
         self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
     def test_pipeline(self):
         X = np.random.randn(100, 10)
         equations = pd.DataFrame(
@@ -41,9 +70,7 @@ class TestJAX(unittest.TestCase):
             variable_names="x1 x2 x3".split(" "),
         )
-        model.selection = [1, 2, 3]
-        model.n_features = 3
-        model.using_pandas = False
         model.refresh()
         jformat = model.jax()

 import pandas as pd
 from jax import numpy as jnp
 from jax import random
 import sympy
         f, params = sympy2jax(cosx, [x, y, z])
         self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
+    def test_pipeline_pandas(self):
+        X = pd.DataFrame(np.random.randn(100, 10))
+        equations = pd.DataFrame(
+            {
+                "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
+                "MSE": [1.0, 0.1, 1e-5],
+                "Complexity": [1, 2, 3],
+            }
+        )
+        equations["Complexity MSE Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup", sep="|"
+        )
+        model = PySRRegressor(
+            equation_file="equation_file.csv",
+            output_jax_format=True,
+            variable_names="x1 x2 x3".split(" "),
+        )
+        model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
+        model.refresh()
+        jformat = model.jax()
+        np.testing.assert_almost_equal(
+            np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
+            np.square(np.cos(X.values[:, 1])),  # Select feature 1
+            decimal=4,
+        )
     def test_pipeline(self):
         X = np.random.randn(100, 10)
         equations = pd.DataFrame(
             variable_names="x1 x2 x3".split(" "),
         )
+        model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
         model.refresh()
         jformat = model.jax()

test/test_torch.py CHANGED Viewed

@@ -20,6 +20,40 @@ class TestTorch(unittest.TestCase):
             np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
         )
     def test_pipeline(self):
         X = np.random.randn(100, 10)
         equations = pd.DataFrame(
@@ -37,20 +71,18 @@ class TestTorch(unittest.TestCase):
         model = PySRRegressor(
             model_selection="accuracy",
             equation_file="equation_file.csv",
-            variable_names="x1 x2 x3".split(" "),
             extra_sympy_mappings={},
             output_torch_format=True,
         )
-        model.selection = [1, 2, 3]
-        model.n_features = 2  # TODO: Why is this 2 and not 3?
-        model.using_pandas = False
         model.refresh()
         tformat = model.pytorch()
         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
         np.testing.assert_almost_equal(
             tformat(torch.tensor(X)).detach().numpy(),
-            np.square(np.cos(X[:, 1])),  # Selection 1st feature
             decimal=4,
         )
@@ -89,14 +121,11 @@ class TestTorch(unittest.TestCase):
         model = PySRRegressor(
             model_selection="accuracy",
             equation_file="equation_file_custom_operator.csv",
-            variable_names="x1 x2 x3".split(" "),
             extra_sympy_mappings={"mycustomoperator": sympy.sin},
             extra_torch_mappings={"mycustomoperator": torch.sin},
             output_torch_format=True,
         )
-        model.selection = [0, 1, 2]
-        model.n_features = 3
-        model.using_pandas = False
         model.refresh()
         self.assertEqual(str(model.sympy()), "sin(x1)")
         # Will automatically use the set global state from get_hof.
@@ -105,6 +134,6 @@ class TestTorch(unittest.TestCase):
         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
         np.testing.assert_almost_equal(
             tformat(torch.tensor(X)).detach().numpy(),
-            np.sin(X[:, 0]),  # Selection 1st feature
             decimal=4,
         )

             np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
         )
+    def test_pipeline_pandas(self):
+        X = pd.DataFrame(np.random.randn(100, 10))
+        equations = pd.DataFrame(
+            {
+                "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
+                "MSE": [1.0, 0.1, 1e-5],
+                "Complexity": [1, 2, 3],
+            }
+        )
+        equations["Complexity MSE Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup", sep="|"
+        )
+        model = PySRRegressor(
+            model_selection="accuracy",
+            equation_file="equation_file.csv",
+            extra_sympy_mappings={},
+            output_torch_format=True,
+        )
+        # Because a model hasn't been fit via the `fit` method, some
+        # attributes will not/cannot be set. For the purpose of
+        # testing, these attributes will be set manually here.
+        model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
+        model.refresh()
+        tformat = model.pytorch()
+        self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
+        np.testing.assert_almost_equal(
+            tformat(torch.tensor(X.values)).detach().numpy(),
+            np.square(np.cos(X.values[:, 1])),  # Selection 1st feature
+            decimal=4,
+        )
     def test_pipeline(self):
         X = np.random.randn(100, 10)
         equations = pd.DataFrame(
         model = PySRRegressor(
             model_selection="accuracy",
             equation_file="equation_file.csv",
             extra_sympy_mappings={},
             output_torch_format=True,
         )
+        model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
         model.refresh()
         tformat = model.pytorch()
         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
         np.testing.assert_almost_equal(
             tformat(torch.tensor(X)).detach().numpy(),
+            np.square(np.cos(X[:, 1])),  # 2nd feature
             decimal=4,
         )
         model = PySRRegressor(
             model_selection="accuracy",
             equation_file="equation_file_custom_operator.csv",
             extra_sympy_mappings={"mycustomoperator": sympy.sin},
             extra_torch_mappings={"mycustomoperator": torch.sin},
             output_torch_format=True,
         )
+        model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
         model.refresh()
         self.assertEqual(str(model.sympy()), "sin(x1)")
         # Will automatically use the set global state from get_hof.
         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
         np.testing.assert_almost_equal(
             tformat(torch.tensor(X)).detach().numpy(),
+            np.sin(X[:, 1]),
             decimal=4,
         )