MilesCranmer commited on
Commit
c3dc203
2 Parent(s): 5bbefa6 ad1c492

Merge pull request #146 from tttc3/refactor-PySRRegressor

Browse files
pysr/__init__.py CHANGED
@@ -6,8 +6,8 @@ from .sr import (
6
  best_tex,
7
  best_callable,
8
  best_row,
9
- install,
10
  )
 
11
  from .feynman_problems import Problem, FeynmanProblem
12
  from .export_jax import sympy2jax
13
  from .export_torch import sympy2torch
 
6
  best_tex,
7
  best_callable,
8
  best_row,
 
9
  )
10
+ from .julia_helpers import install
11
  from .feynman_problems import Problem, FeynmanProblem
12
  from .export_jax import sympy2jax
13
  from .export_torch import sympy2torch
pysr/export_numpy.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Code for exporting discovered expressions to numpy"""
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sympy import lambdify
5
+ import warnings
6
+
7
+
8
+ class CallableEquation:
9
+ """Simple wrapper for numpy lambda functions built with sympy"""
10
+
11
+ def __init__(self, sympy_symbols, eqn, selection=None, variable_names=None):
12
+ self._sympy = eqn
13
+ self._sympy_symbols = sympy_symbols
14
+ self._selection = selection
15
+ self._variable_names = variable_names
16
+
17
+ def __repr__(self):
18
+ return f"PySRFunction(X=>{self._sympy})"
19
+
20
+ def __call__(self, X):
21
+ expected_shape = (X.shape[0],)
22
+ if isinstance(X, pd.DataFrame):
23
+ # Lambda function takes as argument:
24
+ return self._lambda(
25
+ **{k: X[k].values for k in self._variable_names}
26
+ ) * np.ones(expected_shape)
27
+ if self._selection is not None:
28
+ if X.shape[1] != len(self._selection):
29
+ warnings.warn(
30
+ "`X` should be of shape (n_samples, len(self._selection)). "
31
+ "Automatically filtering `X` to selection. "
32
+ "Note: Filtered `X` column order may not match column order in fit "
33
+ "this may lead to incorrect predictions and other errors."
34
+ )
35
+ X = X[:, self._selection]
36
+ return self._lambda(*X.T) * np.ones(expected_shape)
37
+
38
+ @property
39
+ def _lambda(self):
40
+ return lambdify(self._sympy_symbols, self._sympy)
pysr/julia_helpers.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Functions for initializing the Julia environment and installing deps."""
2
+ import warnings
3
+ from pathlib import Path
4
+ import os
5
+
6
+ from .version import __version__, __symbolic_regression_jl_version__
7
+
8
+
9
+ def install(julia_project=None, quiet=False): # pragma: no cover
10
+ """
11
+ Install PyCall.jl and all required dependencies for SymbolicRegression.jl.
12
+
13
+ Also updates the local Julia registry.
14
+ """
15
+ import julia
16
+
17
+ julia.install(quiet=quiet)
18
+
19
+ julia_project, is_shared = _get_julia_project(julia_project)
20
+
21
+ Main = init_julia()
22
+ Main.eval("using Pkg")
23
+
24
+ io = "devnull" if quiet else "stderr"
25
+ io_arg = f"io={io}" if is_julia_version_greater_eq(Main, "1.6") else ""
26
+
27
+ # Can't pass IO to Julia call as it evaluates to PyObject, so just directly
28
+ # use Main.eval:
29
+ Main.eval(
30
+ f'Pkg.activate("{_escape_filename(julia_project)}", shared = Bool({int(is_shared)}), {io_arg})'
31
+ )
32
+ if is_shared:
33
+ # Install SymbolicRegression.jl:
34
+ _add_sr_to_julia_project(Main, io_arg)
35
+
36
+ Main.eval(f"Pkg.instantiate({io_arg})")
37
+ Main.eval(f"Pkg.precompile({io_arg})")
38
+ if not quiet:
39
+ warnings.warn(
40
+ "It is recommended to restart Python after installing PySR's dependencies,"
41
+ " so that the Julia environment is properly initialized."
42
+ )
43
+
44
+
45
+ def import_error_string(julia_project=None):
46
+ s = """
47
+ Required dependencies are not installed or built. Run the following code in the Python REPL:
48
+
49
+ >>> import pysr
50
+ >>> pysr.install()
51
+ """
52
+
53
+ if julia_project is not None:
54
+ s += f"""
55
+ Tried to activate project {julia_project} but failed."""
56
+
57
+ return s
58
+
59
+
60
+ def _get_julia_project(julia_project):
61
+ if julia_project is None:
62
+ is_shared = True
63
+ julia_project = f"pysr-{__version__}"
64
+ else:
65
+ is_shared = False
66
+ julia_project = Path(julia_project)
67
+ return julia_project, is_shared
68
+
69
+
70
+ def is_julia_version_greater_eq(Main, version="1.6"):
71
+ """Check if Julia version is greater than specified version."""
72
+ return Main.eval(f'VERSION >= v"{version}"')
73
+
74
+
75
+ def init_julia():
76
+ """Initialize julia binary, turning off compiled modules if needed."""
77
+ from julia.core import JuliaInfo, UnsupportedPythonError
78
+
79
+ try:
80
+ info = JuliaInfo.load(julia="julia")
81
+ except FileNotFoundError:
82
+ env_path = os.environ["PATH"]
83
+ raise FileNotFoundError(
84
+ f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
85
+ )
86
+
87
+ if not info.is_pycall_built():
88
+ raise ImportError(import_error_string())
89
+
90
+ Main = None
91
+ try:
92
+ from julia import Main as _Main
93
+
94
+ Main = _Main
95
+ except UnsupportedPythonError:
96
+ # Static python binary, so we turn off pre-compiled modules.
97
+ from julia.core import Julia
98
+
99
+ jl = Julia(compiled_modules=False)
100
+ from julia import Main as _Main
101
+
102
+ Main = _Main
103
+
104
+ return Main
105
+
106
+
107
+ def _add_sr_to_julia_project(Main, io_arg):
108
+ Main.sr_spec = Main.PackageSpec(
109
+ name="SymbolicRegression",
110
+ url="https://github.com/MilesCranmer/SymbolicRegression.jl",
111
+ rev="v" + __symbolic_regression_jl_version__,
112
+ )
113
+ Main.eval(f"Pkg.add(sr_spec, {io_arg})")
114
+ Main.clustermanagers_spec = Main.PackageSpec(
115
+ name="ClusterManagers",
116
+ url="https://github.com/JuliaParallel/ClusterManagers.jl",
117
+ rev="14e7302f068794099344d5d93f71979aaf4fbeb3",
118
+ )
119
+ Main.eval(f"Pkg.add(clustermanagers_spec, {io_arg})")
120
+
121
+
122
+ def _escape_filename(filename):
123
+ """Turns a file into a string representation with correctly escaped backslashes"""
124
+ str_repr = str(filename)
125
+ str_repr = str_repr.replace("\\", "\\\\")
126
+ return str_repr
pysr/sr.py CHANGED
The diff for this file is too large to render. See raw diff
 
pysr/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.8.7"
2
- __symbolic_regression_jl_version__ = "0.9.4"
 
1
+ __version__ = "0.9.0"
2
+ __symbolic_regression_jl_version__ = "0.9.6"
test/test.py CHANGED
@@ -1,11 +1,11 @@
1
  import inspect
2
  import unittest
3
- from unittest.mock import patch
4
  import numpy as np
 
5
  from pysr import PySRRegressor
6
  from pysr.sr import run_feature_selection, _handle_feature_selection
 
7
  import sympy
8
- from sympy import lambdify
9
  import pandas as pd
10
  import warnings
11
 
@@ -21,6 +21,7 @@ class TestPipeline(unittest.TestCase):
21
  inspect.signature(PySRRegressor.__init__).parameters["populations"].default
22
  )
23
  self.default_test_kwargs = dict(
 
24
  model_selection="accuracy",
25
  niterations=default_niterations * 2,
26
  populations=default_populations * 2,
@@ -30,17 +31,25 @@ class TestPipeline(unittest.TestCase):
30
 
31
  def test_linear_relation(self):
32
  y = self.X[:, 0]
33
- model = PySRRegressor(**self.default_test_kwargs)
 
 
 
34
  model.fit(self.X, y)
35
- print(model.equations)
36
  self.assertLessEqual(model.get_best()["loss"], 1e-4)
37
 
38
  def test_multiprocessing(self):
39
  y = self.X[:, 0]
40
- model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
 
 
 
 
 
41
  model.fit(self.X, y)
42
- print(model.equations)
43
- self.assertLessEqual(model.equations.iloc[-1]["loss"], 1e-4)
44
 
45
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
46
  y = self.X[:, [0, 1]] ** 2
@@ -55,11 +64,12 @@ class TestPipeline(unittest.TestCase):
55
  # Test custom operators with constraints:
56
  nested_constraints={"square_op": {"square_op": 3}},
57
  constraints={"square_op": 10},
 
58
  )
59
  model.fit(self.X, y)
60
- equations = model.equations
61
  print(equations)
62
- self.assertIn("square_op", model.equations[0].iloc[-1]["equation"])
63
  self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
64
  self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
65
 
@@ -95,6 +105,7 @@ class TestPipeline(unittest.TestCase):
95
  procs=0,
96
  temp_equation_file=True,
97
  delete_tempfiles=False,
 
98
  )
99
  model.fit(X.copy(), y, weights=w)
100
 
@@ -117,27 +128,29 @@ class TestPipeline(unittest.TestCase):
117
  print("Model equations: ", model.sympy()[1])
118
  print("True equation: x1^2")
119
 
120
- def test_empty_operators_single_input_multirun(self):
121
  X = self.rstate.randn(100, 1)
122
  y = X[:, 0] + 3.0
123
  regressor = PySRRegressor(
124
  unary_operators=[],
125
  binary_operators=["plus"],
126
  **self.default_test_kwargs,
 
127
  )
128
  self.assertTrue("None" in regressor.__repr__())
129
  regressor.fit(X, y)
130
  self.assertTrue("None" not in regressor.__repr__())
131
  self.assertTrue(">>>>" in regressor.__repr__())
132
 
133
- self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
134
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
135
 
136
  # Test if repeated fit works:
137
- regressor.set_params(niterations=0)
 
138
  regressor.fit(X, y)
139
 
140
- self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
141
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
142
 
143
  # Tweak model selection:
@@ -157,7 +170,11 @@ class TestPipeline(unittest.TestCase):
157
  **self.default_test_kwargs,
158
  procs=0,
159
  denoise=True,
 
160
  )
 
 
 
161
  model.fit(self.X, y)
162
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
163
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
@@ -188,11 +205,11 @@ class TestPipeline(unittest.TestCase):
188
  unary_operators=[],
189
  binary_operators=["+", "*", "/", "-"],
190
  **self.default_test_kwargs,
191
- Xresampled=Xresampled,
192
  denoise=True,
193
  nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
 
194
  )
195
- model.fit(X, y)
196
  self.assertNotIn("unused_feature", model.latex())
197
  self.assertIn("T", model.latex())
198
  self.assertIn("x", model.latex())
@@ -217,18 +234,31 @@ class TestPipeline(unittest.TestCase):
217
  unary_operators=["cos"],
218
  select_k_features=3,
219
  early_stop_condition=1e-4, # Stop once most accurate equation is <1e-4 MSE
220
- Xresampled=Xresampled,
221
  maxsize=12,
222
  **self.default_test_kwargs,
223
  )
224
- model.fit(X, y)
225
  model.set_params(model_selection="accuracy")
226
- model.predict(X)
227
  self.assertLess(np.average((model.predict(X) - y) ** 2), 1e-4)
 
 
 
228
 
229
 
230
  class TestBest(unittest.TestCase):
231
  def setUp(self):
 
 
 
 
 
 
 
 
 
 
 
 
232
  equations = pd.DataFrame(
233
  {
234
  "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
@@ -241,17 +271,8 @@ class TestBest(unittest.TestCase):
241
  "equation_file.csv.bkup", sep="|"
242
  )
243
 
244
- self.model = PySRRegressor(
245
- equation_file="equation_file.csv",
246
- variable_names="x0 x1".split(" "),
247
- extra_sympy_mappings={},
248
- output_jax_format=False,
249
- model_selection="accuracy",
250
- )
251
- self.model.n_features = 2
252
  self.model.refresh()
253
- self.equations = self.model.equations
254
- self.rstate = np.random.RandomState(0)
255
 
256
  def test_best(self):
257
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
@@ -266,9 +287,9 @@ class TestBest(unittest.TestCase):
266
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
267
 
268
  def test_best_lambda(self):
269
- X = self.rstate.randn(10, 2)
270
- y = np.cos(X[:, 0]) ** 2
271
- for f in [self.model.predict, self.equations.iloc[-1]["lambda_format"]]:
272
  np.testing.assert_almost_equal(f(X), y, decimal=4)
273
 
274
 
@@ -308,12 +329,12 @@ class TestMiscellaneous(unittest.TestCase):
308
 
309
  This should give a warning, and sets the correct value.
310
  """
311
- with self.assertWarns(UserWarning):
312
  model = PySRRegressor(fractionReplaced=0.2)
313
  # This is a deprecated parameter, so we should get a warning.
314
 
315
  # The correct value should be set:
316
- self.assertEqual(model.params["fraction_replaced"], 0.2)
317
 
318
  def test_size_warning(self):
319
  """Ensure that a warning is given for a large input size."""
@@ -336,3 +357,59 @@ class TestMiscellaneous(unittest.TestCase):
336
  with self.assertRaises(Exception) as context:
337
  model.fit(X, y)
338
  self.assertIn("with 10 features or more", str(context.exception))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import inspect
2
  import unittest
 
3
  import numpy as np
4
+ from sklearn import model_selection
5
  from pysr import PySRRegressor
6
  from pysr.sr import run_feature_selection, _handle_feature_selection
7
+ from sklearn.utils.estimator_checks import check_estimator
8
  import sympy
 
9
  import pandas as pd
10
  import warnings
11
 
 
21
  inspect.signature(PySRRegressor.__init__).parameters["populations"].default
22
  )
23
  self.default_test_kwargs = dict(
24
+ progress=False,
25
  model_selection="accuracy",
26
  niterations=default_niterations * 2,
27
  populations=default_populations * 2,
 
31
 
32
  def test_linear_relation(self):
33
  y = self.X[:, 0]
34
+ model = PySRRegressor(
35
+ **self.default_test_kwargs,
36
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
37
+ )
38
  model.fit(self.X, y)
39
+ print(model.equations_)
40
  self.assertLessEqual(model.get_best()["loss"], 1e-4)
41
 
42
  def test_multiprocessing(self):
43
  y = self.X[:, 0]
44
+ model = PySRRegressor(
45
+ **self.default_test_kwargs,
46
+ procs=2,
47
+ multithreading=False,
48
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
49
+ )
50
  model.fit(self.X, y)
51
+ print(model.equations_)
52
+ self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
53
 
54
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
55
  y = self.X[:, [0, 1]] ** 2
 
64
  # Test custom operators with constraints:
65
  nested_constraints={"square_op": {"square_op": 3}},
66
  constraints={"square_op": 10},
67
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
68
  )
69
  model.fit(self.X, y)
70
+ equations = model.equations_
71
  print(equations)
72
+ self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
73
  self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
74
  self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
75
 
 
105
  procs=0,
106
  temp_equation_file=True,
107
  delete_tempfiles=False,
108
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
109
  )
110
  model.fit(X.copy(), y, weights=w)
111
 
 
128
  print("Model equations: ", model.sympy()[1])
129
  print("True equation: x1^2")
130
 
131
+ def test_empty_operators_single_input_warm_start(self):
132
  X = self.rstate.randn(100, 1)
133
  y = X[:, 0] + 3.0
134
  regressor = PySRRegressor(
135
  unary_operators=[],
136
  binary_operators=["plus"],
137
  **self.default_test_kwargs,
138
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
139
  )
140
  self.assertTrue("None" in regressor.__repr__())
141
  regressor.fit(X, y)
142
  self.assertTrue("None" not in regressor.__repr__())
143
  self.assertTrue(">>>>" in regressor.__repr__())
144
 
145
+ self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
146
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
147
 
148
  # Test if repeated fit works:
149
+ regressor.set_params(niterations=0, warm_start=True, early_stop_condition=None)
150
+ # This should exit immediately, and use the old equations
151
  regressor.fit(X, y)
152
 
153
+ self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
154
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
155
 
156
  # Tweak model selection:
 
170
  **self.default_test_kwargs,
171
  procs=0,
172
  denoise=True,
173
+ early_stop_condition="stop_if(loss, complexity) = loss < 0.05 && complexity == 2",
174
  )
175
+ # We expect in this case that the "best"
176
+ # equation should be the right one:
177
+ model.set_params(model_selection="best")
178
  model.fit(self.X, y)
179
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
180
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
 
205
  unary_operators=[],
206
  binary_operators=["+", "*", "/", "-"],
207
  **self.default_test_kwargs,
 
208
  denoise=True,
209
  nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
210
+ early_stop_condition="stop_if(loss, complexity) = loss < 1e-3 && complexity == 7",
211
  )
212
+ model.fit(X, y, Xresampled=Xresampled)
213
  self.assertNotIn("unused_feature", model.latex())
214
  self.assertIn("T", model.latex())
215
  self.assertIn("x", model.latex())
 
234
  unary_operators=["cos"],
235
  select_k_features=3,
236
  early_stop_condition=1e-4, # Stop once most accurate equation is <1e-4 MSE
 
237
  maxsize=12,
238
  **self.default_test_kwargs,
239
  )
 
240
  model.set_params(model_selection="accuracy")
241
+ model.fit(X, y, Xresampled=Xresampled)
242
  self.assertLess(np.average((model.predict(X) - y) ** 2), 1e-4)
243
+ # Again, but with numpy arrays:
244
+ model.fit(X.values, y.values, Xresampled=Xresampled.values)
245
+ self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
246
 
247
 
248
  class TestBest(unittest.TestCase):
249
  def setUp(self):
250
+ self.rstate = np.random.RandomState(0)
251
+ self.X = self.rstate.randn(10, 2)
252
+ self.y = np.cos(self.X[:, 0]) ** 2
253
+ self.model = PySRRegressor(
254
+ progress=False,
255
+ niterations=1,
256
+ extra_sympy_mappings={},
257
+ output_jax_format=False,
258
+ model_selection="accuracy",
259
+ equation_file="equation_file.csv",
260
+ )
261
+ self.model.fit(self.X, self.y)
262
  equations = pd.DataFrame(
263
  {
264
  "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
 
271
  "equation_file.csv.bkup", sep="|"
272
  )
273
 
 
 
 
 
 
 
 
 
274
  self.model.refresh()
275
+ self.equations_ = self.model.equations_
 
276
 
277
  def test_best(self):
278
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
 
287
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
288
 
289
  def test_best_lambda(self):
290
+ X = self.X
291
+ y = self.y
292
+ for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
293
  np.testing.assert_almost_equal(f(X), y, decimal=4)
294
 
295
 
 
329
 
330
  This should give a warning, and sets the correct value.
331
  """
332
+ with self.assertWarns(FutureWarning):
333
  model = PySRRegressor(fractionReplaced=0.2)
334
  # This is a deprecated parameter, so we should get a warning.
335
 
336
  # The correct value should be set:
337
+ self.assertEqual(model.fraction_replaced, 0.2)
338
 
339
  def test_size_warning(self):
340
  """Ensure that a warning is given for a large input size."""
 
357
  with self.assertRaises(Exception) as context:
358
  model.fit(X, y)
359
  self.assertIn("with 10 features or more", str(context.exception))
360
+
361
+ def test_deterministic_warnings(self):
362
+ """Ensure that warnings are given for determinism"""
363
+ model = PySRRegressor(random_state=0)
364
+ X = np.random.randn(100, 2)
365
+ y = np.random.randn(100)
366
+ with warnings.catch_warnings():
367
+ warnings.simplefilter("error")
368
+ with self.assertRaises(Exception) as context:
369
+ model.fit(X, y)
370
+ self.assertIn("`deterministic`", str(context.exception))
371
+
372
+ def test_deterministic_errors(self):
373
+ """Setting deterministic without random_state should error"""
374
+ model = PySRRegressor(deterministic=True)
375
+ X = np.random.randn(100, 2)
376
+ y = np.random.randn(100)
377
+ with self.assertRaises(ValueError):
378
+ model.fit(X, y)
379
+
380
+ def test_scikit_learn_compatibility(self):
381
+ """Test PySRRegressor compatibility with scikit-learn."""
382
+ model = PySRRegressor(
383
+ max_evals=1000,
384
+ verbosity=0,
385
+ progress=False,
386
+ random_state=0,
387
+ deterministic=True,
388
+ procs=0,
389
+ multithreading=False,
390
+ warm_start=False,
391
+ ) # Return early.
392
+
393
+ check_generator = check_estimator(model, generate_only=True)
394
+ exception_messages = []
395
+ for (_, check) in check_generator:
396
+ try:
397
+ with warnings.catch_warnings():
398
+ warnings.simplefilter("ignore")
399
+ # To ensure an equation file is written for each output in
400
+ # nout, set stop condition to niterations=1
401
+ if check.func.__name__ == "check_regressor_multioutput":
402
+ model.set_params(niterations=1, max_evals=None)
403
+ else:
404
+ model.set_params(max_evals=10000)
405
+ check(model)
406
+ print("Passed", check.func.__name__)
407
+ except Exception as e:
408
+ error_message = str(e)
409
+ exception_messages.append(f"{check.func.__name__}: {error_message}\n")
410
+ print("Failed", check.func.__name__, "with:")
411
+ # Add a leading tab to error message, which
412
+ # might be multi-line:
413
+ print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
414
+ # If any checks failed don't let the test pass.
415
+ self.assertEqual([], exception_messages)
test/test_jax.py CHANGED
@@ -4,8 +4,8 @@ from pysr import sympy2jax, PySRRegressor
4
  import pandas as pd
5
  from jax import numpy as jnp
6
  from jax import random
7
- from jax import grad
8
  import sympy
 
9
 
10
 
11
  class TestJAX(unittest.TestCase):
@@ -21,8 +21,16 @@ class TestJAX(unittest.TestCase):
21
  f, params = sympy2jax(cosx, [x, y, z])
22
  self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
23
 
24
- def test_pipeline(self):
25
- X = np.random.randn(100, 10)
 
 
 
 
 
 
 
 
26
  equations = pd.DataFrame(
27
  {
28
  "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
@@ -35,16 +43,34 @@ class TestJAX(unittest.TestCase):
35
  "equation_file.csv.bkup", sep="|"
36
  )
37
 
38
- model = PySRRegressor(
39
- equation_file="equation_file.csv",
40
- output_jax_format=True,
41
- variable_names="x1 x2 x3".split(" "),
 
 
 
42
  )
43
 
44
- model.selection = [1, 2, 3]
45
- model.n_features = 3
46
- model.using_pandas = False
47
- model.refresh()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  jformat = model.jax()
49
 
50
  np.testing.assert_almost_equal(
@@ -52,3 +78,24 @@ class TestJAX(unittest.TestCase):
52
  np.square(np.cos(X[:, 1])), # Select feature 1
53
  decimal=4,
54
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import pandas as pd
5
  from jax import numpy as jnp
6
  from jax import random
 
7
  import sympy
8
+ from functools import partial
9
 
10
 
11
  class TestJAX(unittest.TestCase):
 
21
  f, params = sympy2jax(cosx, [x, y, z])
22
  self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
23
 
24
+ def test_pipeline_pandas(self):
25
+ X = pd.DataFrame(np.random.randn(100, 10))
26
+ y = np.ones(X.shape[0])
27
+ model = PySRRegressor(
28
+ progress=False,
29
+ max_evals=10000,
30
+ output_jax_format=True,
31
+ )
32
+ model.fit(X, y)
33
+
34
  equations = pd.DataFrame(
35
  {
36
  "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 
43
  "equation_file.csv.bkup", sep="|"
44
  )
45
 
46
+ model.refresh(checkpoint_file="equation_file.csv")
47
+ jformat = model.jax()
48
+
49
+ np.testing.assert_almost_equal(
50
+ np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
51
+ np.square(np.cos(X.values[:, 1])), # Select feature 1
52
+ decimal=4,
53
  )
54
 
55
+ def test_pipeline(self):
56
+ X = np.random.randn(100, 10)
57
+ y = np.ones(X.shape[0])
58
+ model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
59
+ model.fit(X, y)
60
+
61
+ equations = pd.DataFrame(
62
+ {
63
+ "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
64
+ "MSE": [1.0, 0.1, 1e-5],
65
+ "Complexity": [1, 2, 3],
66
+ }
67
+ )
68
+
69
+ equations["Complexity MSE Equation".split(" ")].to_csv(
70
+ "equation_file.csv.bkup", sep="|"
71
+ )
72
+
73
+ model.refresh(checkpoint_file="equation_file.csv")
74
  jformat = model.jax()
75
 
76
  np.testing.assert_almost_equal(
 
78
  np.square(np.cos(X[:, 1])), # Select feature 1
79
  decimal=4,
80
  )
81
+
82
+ def test_feature_selection(self):
83
+ X = pd.DataFrame({f"k{i}": np.random.randn(1000) for i in range(10, 21)})
84
+ y = X["k15"] ** 2 + np.cos(X["k20"])
85
+
86
+ model = PySRRegressor(
87
+ progress=False,
88
+ unary_operators=["cos"],
89
+ select_k_features=3,
90
+ early_stop_condition=1e-5,
91
+ )
92
+ model.fit(X.values, y.values)
93
+ f, parameters = model.jax().values()
94
+
95
+ np_prediction = model.predict
96
+ jax_prediction = partial(f, parameters=parameters)
97
+
98
+ np_output = np_prediction(X.values)
99
+ jax_output = jax_prediction(X.values)
100
+
101
+ np.testing.assert_almost_equal(np_output, jax_output, decimal=4)
test/test_torch.py CHANGED
@@ -2,7 +2,20 @@ import unittest
2
  import numpy as np
3
  import pandas as pd
4
  from pysr import sympy2torch, PySRRegressor
5
- import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import sympy
7
 
8
 
@@ -13,6 +26,7 @@ class TestTorch(unittest.TestCase):
13
  def test_sympy2torch(self):
14
  x, y, z = sympy.symbols("x y z")
15
  cosx = 1.0 * sympy.cos(x) + y
 
16
  X = torch.tensor(np.random.randn(1000, 3))
17
  true = 1.0 * torch.cos(X[:, 0]) + X[:, 1]
18
  torch_module = sympy2torch(cosx, [x, y, z])
@@ -20,8 +34,18 @@ class TestTorch(unittest.TestCase):
20
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
21
  )
22
 
23
- def test_pipeline(self):
24
- X = np.random.randn(100, 10)
 
 
 
 
 
 
 
 
 
 
25
  equations = pd.DataFrame(
26
  {
27
  "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
@@ -34,23 +58,47 @@ class TestTorch(unittest.TestCase):
34
  "equation_file.csv.bkup", sep="|"
35
  )
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  model = PySRRegressor(
 
 
38
  model_selection="accuracy",
39
- equation_file="equation_file.csv",
40
- variable_names="x1 x2 x3".split(" "),
41
- extra_sympy_mappings={},
42
  output_torch_format=True,
43
  )
44
- model.selection = [1, 2, 3]
45
- model.n_features = 2 # TODO: Why is this 2 and not 3?
46
- model.using_pandas = False
47
- model.refresh()
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  tformat = model.pytorch()
50
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
 
51
  np.testing.assert_almost_equal(
52
  tformat(torch.tensor(X)).detach().numpy(),
53
- np.square(np.cos(X[:, 1])), # Selection 1st feature
54
  decimal=4,
55
  )
56
 
@@ -73,6 +121,14 @@ class TestTorch(unittest.TestCase):
73
 
74
  def test_custom_operator(self):
75
  X = np.random.randn(100, 3)
 
 
 
 
 
 
 
 
76
 
77
  equations = pd.DataFrame(
78
  {
@@ -86,18 +142,12 @@ class TestTorch(unittest.TestCase):
86
  "equation_file_custom_operator.csv.bkup", sep="|"
87
  )
88
 
89
- model = PySRRegressor(
90
- model_selection="accuracy",
91
  equation_file="equation_file_custom_operator.csv",
92
- variable_names="x1 x2 x3".split(" "),
93
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
94
  extra_torch_mappings={"mycustomoperator": torch.sin},
95
- output_torch_format=True,
96
  )
97
- model.selection = [0, 1, 2]
98
- model.n_features = 3
99
- model.using_pandas = False
100
- model.refresh()
101
  self.assertEqual(str(model.sympy()), "sin(x1)")
102
  # Will automatically use the set global state from get_hof.
103
 
@@ -105,6 +155,25 @@ class TestTorch(unittest.TestCase):
105
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
106
  np.testing.assert_almost_equal(
107
  tformat(torch.tensor(X)).detach().numpy(),
108
- np.sin(X[:, 0]), # Selection 1st feature
109
  decimal=4,
110
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
3
  import pandas as pd
4
  from pysr import sympy2torch, PySRRegressor
5
+
6
+ # Need to initialize Julia before importing torch...
7
+ import platform
8
+
9
+ if platform.system() == "Darwin":
10
+ # Import PyJulia, then Torch
11
+ from pysr.julia_helpers import init_julia
12
+
13
+ Main = init_julia()
14
+ import torch
15
+ else:
16
+ # Import Torch, then PyJulia
17
+ # https://github.com/pytorch/pytorch/issues/78829
18
+ import torch
19
  import sympy
20
 
21
 
 
26
  def test_sympy2torch(self):
27
  x, y, z = sympy.symbols("x y z")
28
  cosx = 1.0 * sympy.cos(x) + y
29
+
30
  X = torch.tensor(np.random.randn(1000, 3))
31
  true = 1.0 * torch.cos(X[:, 0]) + X[:, 1]
32
  torch_module = sympy2torch(cosx, [x, y, z])
 
34
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
35
  )
36
 
37
+ def test_pipeline_pandas(self):
38
+ X = pd.DataFrame(np.random.randn(100, 10))
39
+ y = np.ones(X.shape[0])
40
+ model = PySRRegressor(
41
+ progress=False,
42
+ max_evals=10000,
43
+ model_selection="accuracy",
44
+ extra_sympy_mappings={},
45
+ output_torch_format=True,
46
+ )
47
+ model.fit(X, y)
48
+
49
  equations = pd.DataFrame(
50
  {
51
  "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 
58
  "equation_file.csv.bkup", sep="|"
59
  )
60
 
61
+ model.refresh(checkpoint_file="equation_file.csv")
62
+ tformat = model.pytorch()
63
+ self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
64
+
65
+ np.testing.assert_almost_equal(
66
+ tformat(torch.tensor(X.values)).detach().numpy(),
67
+ np.square(np.cos(X.values[:, 1])), # Selection 1st feature
68
+ decimal=4,
69
+ )
70
+
71
+ def test_pipeline(self):
72
+ X = np.random.randn(100, 10)
73
+ y = np.ones(X.shape[0])
74
  model = PySRRegressor(
75
+ progress=False,
76
+ max_evals=10000,
77
  model_selection="accuracy",
 
 
 
78
  output_torch_format=True,
79
  )
80
+ model.fit(X, y)
81
+
82
+ equations = pd.DataFrame(
83
+ {
84
+ "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
85
+ "MSE": [1.0, 0.1, 1e-5],
86
+ "Complexity": [1, 2, 3],
87
+ }
88
+ )
89
+
90
+ equations["Complexity MSE Equation".split(" ")].to_csv(
91
+ "equation_file.csv.bkup", sep="|"
92
+ )
93
+
94
+ model.refresh(checkpoint_file="equation_file.csv")
95
 
96
  tformat = model.pytorch()
97
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
98
+
99
  np.testing.assert_almost_equal(
100
  tformat(torch.tensor(X)).detach().numpy(),
101
+ np.square(np.cos(X[:, 1])), # 2nd feature
102
  decimal=4,
103
  )
104
 
 
121
 
122
  def test_custom_operator(self):
123
  X = np.random.randn(100, 3)
124
+ y = np.ones(X.shape[0])
125
+ model = PySRRegressor(
126
+ progress=False,
127
+ max_evals=10000,
128
+ model_selection="accuracy",
129
+ output_torch_format=True,
130
+ )
131
+ model.fit(X, y)
132
 
133
  equations = pd.DataFrame(
134
  {
 
142
  "equation_file_custom_operator.csv.bkup", sep="|"
143
  )
144
 
145
+ model.set_params(
 
146
  equation_file="equation_file_custom_operator.csv",
 
147
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
148
  extra_torch_mappings={"mycustomoperator": torch.sin},
 
149
  )
150
+ model.refresh(checkpoint_file="equation_file_custom_operator.csv")
 
 
 
151
  self.assertEqual(str(model.sympy()), "sin(x1)")
152
  # Will automatically use the set global state from get_hof.
153
 
 
155
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
156
  np.testing.assert_almost_equal(
157
  tformat(torch.tensor(X)).detach().numpy(),
158
+ np.sin(X[:, 1]),
159
  decimal=4,
160
  )
161
+
162
+ def test_feature_selection(self):
163
+ X = pd.DataFrame({f"k{i}": np.random.randn(1000) for i in range(10, 21)})
164
+ y = X["k15"] ** 2 + np.cos(X["k20"])
165
+
166
+ model = PySRRegressor(
167
+ progress=False,
168
+ unary_operators=["cos"],
169
+ select_k_features=3,
170
+ early_stop_condition=1e-5,
171
+ )
172
+ model.fit(X.values, y.values)
173
+ torch_module = model.pytorch()
174
+
175
+ np_output = model.predict(X.values)
176
+
177
+ torch_output = torch_module(torch.tensor(X.values)).detach().numpy()
178
+
179
+ np.testing.assert_almost_equal(np_output, torch_output, decimal=4)