Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Mar 21, 2023

Commit

81ba2f3

unverified ·

2 Parent(s): ab9ae60 38f33fd

Merge pull request #281 from MilesCranmer/complex-numbers

Browse files

Files changed (6) hide show

docs/examples.md +35 -1
pysr/__init__.py +1 -0
pysr/sklearn_monkeypatch.py +13 -0
pysr/sr.py +28 -1
pysr/test/test.py +15 -2
pysr/version.py +2 -2

docs/examples.md CHANGED Viewed

@@ -284,7 +284,41 @@ You can get the sympy version of the best equation with:
 model.sympy()
 ```
-## 8. Additional features
 For the many other features available in PySR, please
 read the [Options section](options.md).

 model.sympy()
 ```
+## 8. Complex numbers
+PySR can also search for complex-valued expressions. Simply pass
+data with a complex datatype (e.g., `np.complex128`),
+and PySR will automatically search for complex-valued expressions:
+```python
+import numpy as np
+X = np.random.randn(100, 1) + 1j * np.random.randn(100, 1)
+y = (1 + 2j) * np.cos(X[:, 0] * (0.5 - 0.2j))
+model = PySRRegressor(
+    binary_operators=["+", "-", "*"], unary_operators=["cos"], niterations=100,
+)
+model.fit(X, y)
+```
+You can see that all of the learned constants are now complex numbers.
+We can get the sympy version of the best equation with:
+```python
+model.sympy()
+```
+We can also make predictions normally, by passing complex data:
+```python
+model.predict(X, -1)
+```
+to make predictions with the most accurate expression.
+## 9. Additional features
 For the many other features available in PySR, please
 read the [Options section](options.md).

pysr/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .version import __version__
 from .sr import (
     pysr,

+from . import sklearn_monkeypatch
 from .version import __version__
 from .sr import (
     pysr,

pysr/sklearn_monkeypatch.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Here, we monkey patch scikit-learn until this
+# issue is fixed: https://github.com/scikit-learn/scikit-learn/issues/25922
+from sklearn.utils import validation
+def _ensure_no_complex_data(*args, **kwargs):
+    ...
+try:
+    validation._ensure_no_complex_data = _ensure_no_complex_data
+except AttributeError:
+    ...

pysr/sr.py CHANGED Viewed

@@ -498,6 +498,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         What precision to use for the data. By default this is `32`
         (float32), but you can select `64` or `16` as well, giving
         you 64 or 16 bits of floating point precision, respectively.
         Default is `32`.
     random_state : int, Numpy RandomState instance or None
         Pass an int for reproducible results across multiple function calls.
@@ -1619,7 +1621,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         )
         # Convert data to desired precision
-        np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[self.precision]
         # This converts the data into a Julia array:
         Main.X = np.array(X, dtype=np_dtype).T
@@ -2007,6 +2015,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""
         try:
             if self.nout_ > 1:
                 all_outputs = []
@@ -2024,6 +2033,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                         },
                         inplace=True,
                     )
                     all_outputs.append(df)
             else:
@@ -2039,6 +2049,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                     },
                     inplace=True,
                 )
         except FileNotFoundError:
             raise RuntimeError(
                 "Couldn't find equation file! The equation search likely exited "
@@ -2329,3 +2343,16 @@ def _csv_filename_to_pkl_filename(csv_filename) -> str:
     pkl_basename = base + ".pkl"
     return os.path.join(dirname, pkl_basename)

         What precision to use for the data. By default this is `32`
         (float32), but you can select `64` or `16` as well, giving
         you 64 or 16 bits of floating point precision, respectively.
+        If you pass complex data, the corresponding complex precision
+        will be used (i.e., `64` for complex128, `32` for complex64).
         Default is `32`.
     random_state : int, Numpy RandomState instance or None
         Pass an int for reproducible results across multiple function calls.
         )
         # Convert data to desired precision
+        test_X = np.array(X)
+        is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
+        is_real = not is_complex
+        if is_real:
+            np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[self.precision]
+        else:
+            np_dtype = {32: np.complex64, 64: np.complex128}[self.precision]
         # This converts the data into a Julia array:
         Main.X = np.array(X, dtype=np_dtype).T
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""
         try:
             if self.nout_ > 1:
                 all_outputs = []
                         },
                         inplace=True,
                     )
+                    df["equation"] = df["equation"].apply(_preprocess_julia_floats)
                     all_outputs.append(df)
             else:
                     },
                     inplace=True,
                 )
+                all_outputs[-1]["equation"] = all_outputs[-1]["equation"].apply(
+                    _preprocess_julia_floats
+                )
         except FileNotFoundError:
             raise RuntimeError(
                 "Couldn't find equation file! The equation search likely exited "
     pkl_basename = base + ".pkl"
     return os.path.join(dirname, pkl_basename)
+_regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
+_regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
+_regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
+_apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
+_apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
+_apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
+def _preprocess_julia_floats(s: str) -> str:
+    return _apply_regexp_sci(_apply_regexp_im_sci(_apply_regexp_im(s)))

pysr/test/test.py CHANGED Viewed

@@ -181,6 +181,20 @@ class TestPipeline(unittest.TestCase):
             print("Model equations: ", model.sympy()[1])
             print("True equation: x1^2")
     def test_empty_operators_single_input_warm_start(self):
         X = self.rstate.randn(100, 1)
         y = X[:, 0] + 3.0
@@ -230,7 +244,6 @@ class TestPipeline(unittest.TestCase):
         regressor.fit(self.X, y)
     def test_noisy(self):
         y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
         model = PySRRegressor(
             # Test that passing a single operator works:
@@ -664,7 +677,7 @@ class TestMiscellaneous(unittest.TestCase):
         check_generator = check_estimator(model, generate_only=True)
         exception_messages = []
-        for (_, check) in check_generator:
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")

             print("Model equations: ", model.sympy()[1])
             print("True equation: x1^2")
+    def test_complex_equations_anonymous_stop(self):
+        X = self.rstate.randn(100, 3) + 1j * self.rstate.randn(100, 3)
+        y = (2 + 1j) * np.cos(X[:, 0] * (0.5 - 0.3j))
+        model = PySRRegressor(
+            binary_operators=["+", "-", "*"],
+            unary_operators=["cos"],
+            **self.default_test_kwargs,
+            early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
+        )
+        model.fit(X, y)
+        test_y = model.predict(X)
+        self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
+        self.assertLessEqual(np.average(np.abs(test_y - y) ** 2), 1e-4)
     def test_empty_operators_single_input_warm_start(self):
         X = self.rstate.randn(100, 1)
         y = X[:, 0] + 3.0
         regressor.fit(self.X, y)
     def test_noisy(self):
         y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
         model = PySRRegressor(
             # Test that passing a single operator works:
         check_generator = check_estimator(model, generate_only=True)
         exception_messages = []
+        for _, check in check_generator:
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")

pysr/version.py CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- __version__ = "0.11.17"
2	- __symbolic_regression_jl_version__ = "0.15.3"


1	+ __version__ = "0.12.0"
2	+ __symbolic_regression_jl_version__ = "0.16.1"