Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Feb 10, 2024

Commit

70b842a

unverified ·

1 Parent(s): e957e34

Save options to PySRRegressor

Browse files

Files changed (2) hide show

pysr/julia_helpers.py +12 -2
pysr/sr.py +25 -15

pysr/julia_helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Functions for initializing the Julia environment and installing deps."""
 import warnings
 from juliacall import convert as jl_convert  # type: ignore
 from .julia_import import jl
@@ -8,6 +9,9 @@ from .julia_import import jl
 jl.seval("using Serialization: Serialization")
 jl.seval("using PythonCall: PythonCall")
 def install(*args, **kwargs):
     del args, kwargs
@@ -35,10 +39,16 @@ def jl_array(x):
     return jl_convert(jl.Array, x)
-def jl_deserialize_s(s):
     if s is None:
         return s
     buf = jl.IOBuffer()
     jl.write(buf, jl_array(s))
     jl.seekstart(buf)
-    return jl.Serialization.deserialize(buf)

 """Functions for initializing the Julia environment and installing deps."""
 import warnings
+import numpy as np
 from juliacall import convert as jl_convert  # type: ignore
 from .julia_import import jl
 jl.seval("using Serialization: Serialization")
 jl.seval("using PythonCall: PythonCall")
+Serialization = jl.Serialization
+PythonCall = jl.PythonCall
 def install(*args, **kwargs):
     del args, kwargs
     return jl_convert(jl.Array, x)
+def jl_serialize(obj):
+    buf = jl.IOBuffer()
+    Serialization.serialize(buf, obj)
+    return np.array(jl.take_b(buf))
+def jl_deserialize(s):
     if s is None:
         return s
     buf = jl.IOBuffer()
     jl.write(buf, jl_array(s))
     jl.seekstart(buf)
+    return Serialization.deserialize(buf)

pysr/sr.py CHANGED Viewed

@@ -33,10 +33,12 @@ from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2
 from .export_torch import sympy2torch
 from .feature_selection import run_feature_selection
 from .julia_helpers import (
     _escape_filename,
     _load_cluster_manager,
     jl_array,
-    jl_deserialize_s,
 )
 from .julia_import import SymbolicRegression, jl
 from .utils import (
@@ -602,11 +604,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Path to the temporary equations directory.
     equation_file_ : str
         Output equation file name produced by the julia backend.
-    raw_julia_state_stream_ : ndarray
         The serialized state for the julia SymbolicRegression.jl backend (after fitting),
         stored as an array of uint8, produced by Julia's Serialization.serialize function.
-    julia_state_ : ndarray
         The deserialized state.
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
     show_pickle_warnings_ : bool
@@ -1053,7 +1059,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         serialization.
         Thus, for `PySRRegressor` to support pickle serialization, the
-        `raw_julia_state_stream_` attribute must be hidden from pickle. This will
         prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
         but does allow all other attributes of a fitted `PySRRegressor` estimator
         to be serialized. Note: Jax and Torch format equations are also removed
@@ -1121,15 +1127,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         )
         return self.equations_
     @property
     def julia_state_(self):
-        return jl_deserialize_s(self.raw_julia_state_stream_)
     @property
     def raw_julia_state_(self):
         warnings.warn(
             "PySRRegressor.raw_julia_state_ is now deprecated. "
-            "Please use PySRRegressor.julia_state_ instead, or raw_julia_state_stream_ "
             "for the raw stream of bytes.",
             FutureWarning,
         )
@@ -1675,6 +1685,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             define_helper_functions=False,
         )
         # Convert data to desired precision
         test_X = np.array(X)
         is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
@@ -1718,7 +1730,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         else:
             jl_y_variable_names = None
-        jl.PythonCall.GC.disable()
         out = SymbolicRegression.equation_search(
             jl_X,
             jl_y,
@@ -1741,12 +1753,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             progress=progress and self.verbosity > 0 and len(y.shape) == 1,
             verbosity=int(self.verbosity),
         )
-        jl.PythonCall.GC.enable()
-        # Serialize output (for pickling)
-        buf = jl.IOBuffer()
-        jl.Serialization.serialize(buf, out)
-        self.raw_julia_state_stream_ = np.array(jl.take_b(buf))
         # Set attributes
         self.equations_ = self.get_hof()
@@ -1810,10 +1819,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             Fitted estimator.
         """
         # Init attributes that are not specified in BaseEstimator
-        if self.warm_start and hasattr(self, "raw_julia_state_stream_"):
             pass
         else:
-            if hasattr(self, "raw_julia_state_stream_"):
                 warnings.warn(
                     "The discovered expressions are being reset. "
                     "Please set `warm_start=True` if you wish to continue "
@@ -1823,7 +1832,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self.equations_ = None
             self.nout_ = 1
             self.selection_mask_ = None
-            self.raw_julia_state_stream_ = None
             self.X_units_ = None
             self.y_units_ = None

 from .export_torch import sympy2torch
 from .feature_selection import run_feature_selection
 from .julia_helpers import (
+    PythonCall,
     _escape_filename,
     _load_cluster_manager,
     jl_array,
+    jl_deserialize,
+    jl_serialize,
 )
 from .julia_import import SymbolicRegression, jl
 from .utils import (
         Path to the temporary equations directory.
     equation_file_ : str
         Output equation file name produced by the julia backend.
+    julia_state_stream_ : ndarray
         The serialized state for the julia SymbolicRegression.jl backend (after fitting),
         stored as an array of uint8, produced by Julia's Serialization.serialize function.
+    julia_state_
         The deserialized state.
+    julia_options_stream_ : ndarray
+        The serialized julia options, stored as an array of uint8,
+    julia_options_
+        The deserialized julia options.
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
     show_pickle_warnings_ : bool
         serialization.
         Thus, for `PySRRegressor` to support pickle serialization, the
+        `julia_state_stream_` attribute must be hidden from pickle. This will
         prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
         but does allow all other attributes of a fitted `PySRRegressor` estimator
         to be serialized. Note: Jax and Torch format equations are also removed
         )
         return self.equations_
+    @property
+    def julia_options_(self):
+        return jl_deserialize(self.julia_options_stream_)
     @property
     def julia_state_(self):
+        return jl_deserialize(self.julia_state_stream_)
     @property
     def raw_julia_state_(self):
         warnings.warn(
             "PySRRegressor.raw_julia_state_ is now deprecated. "
+            "Please use PySRRegressor.julia_state_ instead, or julia_state_stream_ "
             "for the raw stream of bytes.",
             FutureWarning,
         )
             define_helper_functions=False,
         )
+        self.julia_options_stream_ = jl_serialize(options)
         # Convert data to desired precision
         test_X = np.array(X)
         is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
         else:
             jl_y_variable_names = None
+        PythonCall.GC.disable()
         out = SymbolicRegression.equation_search(
             jl_X,
             jl_y,
             progress=progress and self.verbosity > 0 and len(y.shape) == 1,
             verbosity=int(self.verbosity),
         )
+        PythonCall.GC.enable()
+        self.julia_state_stream_ = jl_serialize(out)
         # Set attributes
         self.equations_ = self.get_hof()
             Fitted estimator.
         """
         # Init attributes that are not specified in BaseEstimator
+        if self.warm_start and hasattr(self, "julia_state_stream_"):
             pass
         else:
+            if hasattr(self, "julia_state_stream_"):
                 warnings.warn(
                     "The discovered expressions are being reset. "
                     "Please set `warm_start=True` if you wish to continue "
             self.equations_ = None
             self.nout_ = 1
             self.selection_mask_ = None
+            self.julia_state_stream_ = None
+            self.julia_options_stream_ = None
             self.X_units_ = None
             self.y_units_ = None