tttc3 commited on
Commit
c7187a6
1 Parent(s): 73c6ffd

Updated tests for compatibility with refactor

Browse files
Files changed (4) hide show
  1. pysr/sr.py +1 -1
  2. test/test.py +21 -18
  3. test/test_jax.py +31 -4
  4. test/test_torch.py +39 -10
pysr/sr.py CHANGED
@@ -1029,7 +1029,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
1029
  ":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
1030
  "Will use DataFrame column names instead."
1031
  )
1032
-
1033
  if X.columns.is_object() and X.columns.str.contains(" ").any():
1034
  X.columns = X.columns.str.replace(" ", "_")
1035
  warnings.warn(
 
1029
  ":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
1030
  "Will use DataFrame column names instead."
1031
  )
1032
+
1033
  if X.columns.is_object() and X.columns.str.contains(" ").any():
1034
  X.columns = X.columns.str.replace(" ", "_")
1035
  warnings.warn(
test/test.py CHANGED
@@ -3,6 +3,7 @@ import unittest
3
  from unittest.mock import patch
4
  import numpy as np
5
  from pysr import PySRRegressor
 
6
  from pysr.sr import run_feature_selection, _handle_feature_selection
7
  import sympy
8
  from sympy import lambdify
@@ -21,7 +22,7 @@ class TestPipeline(unittest.TestCase):
21
  inspect.signature(PySRRegressor.__init__).parameters["populations"].default
22
  )
23
  self.default_test_kwargs = dict(
24
- model_selection="accuracy",
25
  niterations=default_niterations * 2,
26
  populations=default_populations * 2,
27
  )
@@ -32,15 +33,15 @@ class TestPipeline(unittest.TestCase):
32
  y = self.X[:, 0]
33
  model = PySRRegressor(**self.default_test_kwargs)
34
  model.fit(self.X, y)
35
- print(model.equations)
36
  self.assertLessEqual(model.get_best()["loss"], 1e-4)
37
 
38
  def test_multiprocessing(self):
39
  y = self.X[:, 0]
40
  model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
41
  model.fit(self.X, y)
42
- print(model.equations)
43
- self.assertLessEqual(model.equations.iloc[-1]["loss"], 1e-4)
44
 
45
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
46
  y = self.X[:, [0, 1]] ** 2
@@ -57,9 +58,9 @@ class TestPipeline(unittest.TestCase):
57
  constraints={"square_op": 10},
58
  )
59
  model.fit(self.X, y)
60
- equations = model.equations
61
  print(equations)
62
- self.assertIn("square_op", model.equations[0].iloc[-1]["equation"])
63
  self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
64
  self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
65
 
@@ -130,14 +131,14 @@ class TestPipeline(unittest.TestCase):
130
  self.assertTrue("None" not in regressor.__repr__())
131
  self.assertTrue(">>>>" in regressor.__repr__())
132
 
133
- self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
134
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
135
 
136
  # Test if repeated fit works:
137
  regressor.set_params(niterations=0)
138
  regressor.fit(X, y)
139
 
140
- self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
141
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
142
 
143
  # Tweak model selection:
@@ -188,12 +189,11 @@ class TestPipeline(unittest.TestCase):
188
  unary_operators=[],
189
  binary_operators=["+", "*", "/", "-"],
190
  **self.default_test_kwargs,
191
- Xresampled=Xresampled,
192
  denoise=True,
193
  select_k_features=2,
194
  nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
195
  )
196
- model.fit(X, y)
197
  self.assertNotIn("unused_feature", model.latex())
198
  self.assertIn("T", model.latex())
199
  self.assertIn("x", model.latex())
@@ -232,10 +232,13 @@ class TestBest(unittest.TestCase):
232
  output_jax_format=False,
233
  model_selection="accuracy",
234
  )
235
- self.model.n_features = 2
236
- self.model.refresh()
237
- self.equations = self.model.equations
238
  self.rstate = np.random.RandomState(0)
 
 
 
 
 
 
239
 
240
  def test_best(self):
241
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
@@ -250,9 +253,9 @@ class TestBest(unittest.TestCase):
250
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
251
 
252
  def test_best_lambda(self):
253
- X = self.rstate.randn(10, 2)
254
- y = np.cos(X[:, 0]) ** 2
255
- for f in [self.model.predict, self.equations.iloc[-1]["lambda_format"]]:
256
  np.testing.assert_almost_equal(f(X), y, decimal=4)
257
 
258
 
@@ -292,12 +295,12 @@ class TestMiscellaneous(unittest.TestCase):
292
 
293
  This should give a warning, and sets the correct value.
294
  """
295
- with self.assertWarns(UserWarning):
296
  model = PySRRegressor(fractionReplaced=0.2)
297
  # This is a deprecated parameter, so we should get a warning.
298
 
299
  # The correct value should be set:
300
- self.assertEqual(model.params["fraction_replaced"], 0.2)
301
 
302
  def test_size_warning(self):
303
  """Ensure that a warning is given for a large input size."""
 
3
  from unittest.mock import patch
4
  import numpy as np
5
  from pysr import PySRRegressor
6
+
7
  from pysr.sr import run_feature_selection, _handle_feature_selection
8
  import sympy
9
  from sympy import lambdify
 
22
  inspect.signature(PySRRegressor.__init__).parameters["populations"].default
23
  )
24
  self.default_test_kwargs = dict(
25
+ model_selection="best",
26
  niterations=default_niterations * 2,
27
  populations=default_populations * 2,
28
  )
 
33
  y = self.X[:, 0]
34
  model = PySRRegressor(**self.default_test_kwargs)
35
  model.fit(self.X, y)
36
+ print(model.equations_)
37
  self.assertLessEqual(model.get_best()["loss"], 1e-4)
38
 
39
  def test_multiprocessing(self):
40
  y = self.X[:, 0]
41
  model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
42
  model.fit(self.X, y)
43
+ print(model.equations_)
44
+ self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
45
 
46
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
47
  y = self.X[:, [0, 1]] ** 2
 
58
  constraints={"square_op": 10},
59
  )
60
  model.fit(self.X, y)
61
+ equations = model.equations_
62
  print(equations)
63
+ self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
64
  self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
65
  self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
66
 
 
131
  self.assertTrue("None" not in regressor.__repr__())
132
  self.assertTrue(">>>>" in regressor.__repr__())
133
 
134
+ self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
135
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
136
 
137
  # Test if repeated fit works:
138
  regressor.set_params(niterations=0)
139
  regressor.fit(X, y)
140
 
141
+ self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
142
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
143
 
144
  # Tweak model selection:
 
189
  unary_operators=[],
190
  binary_operators=["+", "*", "/", "-"],
191
  **self.default_test_kwargs,
 
192
  denoise=True,
193
  select_k_features=2,
194
  nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
195
  )
196
+ model.fit(X, y, Xresampled=Xresampled)
197
  self.assertNotIn("unused_feature", model.latex())
198
  self.assertIn("T", model.latex())
199
  self.assertIn("x", model.latex())
 
232
  output_jax_format=False,
233
  model_selection="accuracy",
234
  )
 
 
 
235
  self.rstate = np.random.RandomState(0)
236
+ # Placeholder values needed to fit the model from an equation file
237
+ self.X = self.rstate.randn(10, 2)
238
+ self.y = np.cos(self.X[:, 0]) ** 2
239
+ self.model.fit(self.X, self.y, from_equation_file=True)
240
+ self.model.refresh()
241
+ self.equations_ = self.model.equations_
242
 
243
  def test_best(self):
244
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
 
253
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
254
 
255
  def test_best_lambda(self):
256
+ X = self.X
257
+ y = self.y
258
+ for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
259
  np.testing.assert_almost_equal(f(X), y, decimal=4)
260
 
261
 
 
295
 
296
  This should give a warning, and sets the correct value.
297
  """
298
+ with self.assertWarns(FutureWarning):
299
  model = PySRRegressor(fractionReplaced=0.2)
300
  # This is a deprecated parameter, so we should get a warning.
301
 
302
  # The correct value should be set:
303
+ self.assertEqual(model.fraction_replaced, 0.2)
304
 
305
  def test_size_warning(self):
306
  """Ensure that a warning is given for a large input size."""
test/test_jax.py CHANGED
@@ -4,7 +4,6 @@ from pysr import sympy2jax, PySRRegressor
4
  import pandas as pd
5
  from jax import numpy as jnp
6
  from jax import random
7
- from jax import grad
8
  import sympy
9
 
10
 
@@ -21,6 +20,36 @@ class TestJAX(unittest.TestCase):
21
  f, params = sympy2jax(cosx, [x, y, z])
22
  self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def test_pipeline(self):
25
  X = np.random.randn(100, 10)
26
  equations = pd.DataFrame(
@@ -41,9 +70,7 @@ class TestJAX(unittest.TestCase):
41
  variable_names="x1 x2 x3".split(" "),
42
  )
43
 
44
- model.selection = [1, 2, 3]
45
- model.n_features = 3
46
- model.using_pandas = False
47
  model.refresh()
48
  jformat = model.jax()
49
 
 
4
  import pandas as pd
5
  from jax import numpy as jnp
6
  from jax import random
 
7
  import sympy
8
 
9
 
 
20
  f, params = sympy2jax(cosx, [x, y, z])
21
  self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
22
 
23
+ def test_pipeline_pandas(self):
24
+ X = pd.DataFrame(np.random.randn(100, 10))
25
+ equations = pd.DataFrame(
26
+ {
27
+ "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
28
+ "MSE": [1.0, 0.1, 1e-5],
29
+ "Complexity": [1, 2, 3],
30
+ }
31
+ )
32
+
33
+ equations["Complexity MSE Equation".split(" ")].to_csv(
34
+ "equation_file.csv.bkup", sep="|"
35
+ )
36
+
37
+ model = PySRRegressor(
38
+ equation_file="equation_file.csv",
39
+ output_jax_format=True,
40
+ variable_names="x1 x2 x3".split(" "),
41
+ )
42
+
43
+ model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
44
+ model.refresh()
45
+ jformat = model.jax()
46
+
47
+ np.testing.assert_almost_equal(
48
+ np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
49
+ np.square(np.cos(X.values[:, 1])), # Select feature 1
50
+ decimal=4,
51
+ )
52
+
53
  def test_pipeline(self):
54
  X = np.random.randn(100, 10)
55
  equations = pd.DataFrame(
 
70
  variable_names="x1 x2 x3".split(" "),
71
  )
72
 
73
+ model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
 
 
74
  model.refresh()
75
  jformat = model.jax()
76
 
test/test_torch.py CHANGED
@@ -20,6 +20,40 @@ class TestTorch(unittest.TestCase):
20
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
21
  )
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def test_pipeline(self):
24
  X = np.random.randn(100, 10)
25
  equations = pd.DataFrame(
@@ -37,20 +71,18 @@ class TestTorch(unittest.TestCase):
37
  model = PySRRegressor(
38
  model_selection="accuracy",
39
  equation_file="equation_file.csv",
40
- variable_names="x1 x2 x3".split(" "),
41
  extra_sympy_mappings={},
42
  output_torch_format=True,
43
  )
44
- model.selection = [1, 2, 3]
45
- model.n_features = 2 # TODO: Why is this 2 and not 3?
46
- model.using_pandas = False
47
  model.refresh()
48
 
49
  tformat = model.pytorch()
50
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
51
  np.testing.assert_almost_equal(
52
  tformat(torch.tensor(X)).detach().numpy(),
53
- np.square(np.cos(X[:, 1])), # Selection 1st feature
54
  decimal=4,
55
  )
56
 
@@ -89,14 +121,11 @@ class TestTorch(unittest.TestCase):
89
  model = PySRRegressor(
90
  model_selection="accuracy",
91
  equation_file="equation_file_custom_operator.csv",
92
- variable_names="x1 x2 x3".split(" "),
93
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
94
  extra_torch_mappings={"mycustomoperator": torch.sin},
95
  output_torch_format=True,
96
  )
97
- model.selection = [0, 1, 2]
98
- model.n_features = 3
99
- model.using_pandas = False
100
  model.refresh()
101
  self.assertEqual(str(model.sympy()), "sin(x1)")
102
  # Will automatically use the set global state from get_hof.
@@ -105,6 +134,6 @@ class TestTorch(unittest.TestCase):
105
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
106
  np.testing.assert_almost_equal(
107
  tformat(torch.tensor(X)).detach().numpy(),
108
- np.sin(X[:, 0]), # Selection 1st feature
109
  decimal=4,
110
  )
 
20
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
21
  )
22
 
23
+ def test_pipeline_pandas(self):
24
+ X = pd.DataFrame(np.random.randn(100, 10))
25
+ equations = pd.DataFrame(
26
+ {
27
+ "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
28
+ "MSE": [1.0, 0.1, 1e-5],
29
+ "Complexity": [1, 2, 3],
30
+ }
31
+ )
32
+
33
+ equations["Complexity MSE Equation".split(" ")].to_csv(
34
+ "equation_file.csv.bkup", sep="|"
35
+ )
36
+
37
+ model = PySRRegressor(
38
+ model_selection="accuracy",
39
+ equation_file="equation_file.csv",
40
+ extra_sympy_mappings={},
41
+ output_torch_format=True,
42
+ )
43
+ # Because a model hasn't been fit via the `fit` method, some
44
+ # attributes will not/cannot be set. For the purpose of
45
+ # testing, these attributes will be set manually here.
46
+ model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
47
+ model.refresh()
48
+
49
+ tformat = model.pytorch()
50
+ self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
51
+ np.testing.assert_almost_equal(
52
+ tformat(torch.tensor(X.values)).detach().numpy(),
53
+ np.square(np.cos(X.values[:, 1])), # Selection 1st feature
54
+ decimal=4,
55
+ )
56
+
57
  def test_pipeline(self):
58
  X = np.random.randn(100, 10)
59
  equations = pd.DataFrame(
 
71
  model = PySRRegressor(
72
  model_selection="accuracy",
73
  equation_file="equation_file.csv",
 
74
  extra_sympy_mappings={},
75
  output_torch_format=True,
76
  )
77
+
78
+ model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
 
79
  model.refresh()
80
 
81
  tformat = model.pytorch()
82
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
83
  np.testing.assert_almost_equal(
84
  tformat(torch.tensor(X)).detach().numpy(),
85
+ np.square(np.cos(X[:, 1])), # 2nd feature
86
  decimal=4,
87
  )
88
 
 
121
  model = PySRRegressor(
122
  model_selection="accuracy",
123
  equation_file="equation_file_custom_operator.csv",
 
124
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
125
  extra_torch_mappings={"mycustomoperator": torch.sin},
126
  output_torch_format=True,
127
  )
128
+ model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
 
 
129
  model.refresh()
130
  self.assertEqual(str(model.sympy()), "sin(x1)")
131
  # Will automatically use the set global state from get_hof.
 
134
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
135
  np.testing.assert_almost_equal(
136
  tformat(torch.tensor(X)).detach().numpy(),
137
+ np.sin(X[:, 1]),
138
  decimal=4,
139
  )