Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Aug 4, 2022

Commit

b8a97f1

1 Parent(s): b53e7fa

Use .pkl instead of .csv.pkl

Browse files

Files changed (2) hide show

pysr/sr.py +28 -10
test/test.py +20 -1

pysr/sr.py CHANGED Viewed

@@ -930,7 +930,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         This should only be used internally by PySRRegressor."""
         # Save model state:
         self.show_pickle_warnings_ = False
-        with open(str(self.equation_file_) + ".pkl", "wb") as f:
             pkl.dump(self, f)
         self.show_pickle_warnings_ = True
@@ -1636,14 +1636,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         # Initially, just save model parameters, so that
         # it can be loaded from an early exit:
-        self._checkpoint()
         # Perform the search:
         self._run(X, y, mutated_params, weights=weights, seed=seed)
         # Then, after fit, we save again, so the pickle file contains
         # the equations:
-        self._checkpoint()
         return self
@@ -2077,6 +2079,17 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
     return selector.get_support(indices=True)
 def load(
     equation_file,
     *,
@@ -2094,7 +2107,8 @@ def load(
     Parameters
     ----------
     equation_file : str
-        Path to a csv file containing equations.
     binary_operators : list[str], default=["+", "-", "*", "/"]
         The same binary operators used when creating the model.
@@ -2123,14 +2137,19 @@ def load(
     model : PySRRegressor
         The model with fitted equations.
     """
     # Try to load model from <equation_file>.pkl
-    print(f"Checking if {equation_file}.pkl exists...")
-    if os.path.exists(str(equation_file) + ".pkl"):
-        print(f"Loading model from {equation_file}.pkl.")
         assert binary_operators is None
         assert unary_operators is None
         assert n_features_in is None
-        with open(str(equation_file) + ".pkl", "rb") as f:
             model = pkl.load(f)
         # Update any parameters if necessary, such as
         # extra_sympy_mappings:
@@ -2142,8 +2161,7 @@ def load(
     # Else, we re-create it.
     print(
-        f"{equation_file}.pkl does not exist, "
-        "so we must create the model from scratch."
     )
     assert binary_operators is not None
     assert unary_operators is not None

         This should only be used internally by PySRRegressor."""
         # Save model state:
         self.show_pickle_warnings_ = False
+        with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
             pkl.dump(self, f)
         self.show_pickle_warnings_ = True
         # Initially, just save model parameters, so that
         # it can be loaded from an early exit:
+        if not self.temp_equation_file:
+            self._checkpoint()
         # Perform the search:
         self._run(X, y, mutated_params, weights=weights, seed=seed)
         # Then, after fit, we save again, so the pickle file contains
         # the equations:
+        if not self.temp_equation_file:
+            self._checkpoint()
         return self
     return selector.get_support(indices=True)
+def _csv_filename_to_pkl_filename(csv_filename) -> str:
+    # Assume that the csv filename is of the form "foo.csv"
+    dirname = str(os.path.dirname(csv_filename))
+    basename = str(os.path.basename(csv_filename))
+    base = str(os.path.splitext(basename)[0])
+    pkl_basename = base + ".pkl"
+    return os.path.join(dirname, pkl_basename)
 def load(
     equation_file,
     *,
     Parameters
     ----------
     equation_file : str
+        Path to a csv file containing equations, or a pickle file
+        containing the model.
     binary_operators : list[str], default=["+", "-", "*", "/"]
         The same binary operators used when creating the model.
     model : PySRRegressor
         The model with fitted equations.
     """
+    if os.path.splitext(equation_file)[1] != ".pkl":
+        pkl_filename = _csv_filename_to_pkl_filename(equation_file)
+    else:
+        pkl_filename = equation_file
     # Try to load model from <equation_file>.pkl
+    print(f"Checking if {pkl_filename} exists...")
+    if os.path.exists(pkl_filename):
+        print(f"Loading model from {pkl_filename}")
         assert binary_operators is None
         assert unary_operators is None
         assert n_features_in is None
+        with open(pkl_filename, "rb") as f:
             model = pkl.load(f)
         # Update any parameters if necessary, such as
         # extra_sympy_mappings:
     # Else, we re-create it.
     print(
+        f"{equation_file} does not exist, " "so we must create the model from scratch."
     )
     assert binary_operators is not None
     assert unary_operators is not None

test/test.py CHANGED Viewed

@@ -5,7 +5,11 @@ import unittest
 import numpy as np
 from sklearn import model_selection
 from pysr import PySRRegressor, load
-from pysr.sr import run_feature_selection, _handle_feature_selection
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
 import pandas as pd
@@ -341,6 +345,7 @@ class TestPipeline(unittest.TestCase):
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
         model3 = load(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
@@ -430,6 +435,20 @@ class TestFeatureSelection(unittest.TestCase):
 class TestMiscellaneous(unittest.TestCase):
     """Test miscellaneous functions."""
     def test_deprecation(self):
         """Ensure that deprecation works as expected.

 import numpy as np
 from sklearn import model_selection
 from pysr import PySRRegressor, load
+from pysr.sr import (
+    run_feature_selection,
+    _handle_feature_selection,
+    _csv_filename_to_pkl_filename,
+)
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
 import pandas as pd
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
+        pickle_file = rand_dir / "equations.pkl"
         model3 = load(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
 class TestMiscellaneous(unittest.TestCase):
     """Test miscellaneous functions."""
+    def test_csv_to_pkl_conversion(self):
+        """Test that csv filename to pkl filename works as expected."""
+        tmpdir = Path(tempfile.mkdtemp())
+        equation_file = tmpdir / "equations.389479384.28378374.csv"
+        expected_pkl_file = tmpdir / "equations.389479384.28378374.pkl"
+        # First, test inputting the paths:
+        test_pkl_file = _csv_filename_to_pkl_filename(equation_file)
+        self.assertEqual(test_pkl_file, str(expected_pkl_file))
+        # Next, test inputting the strings.
+        test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
+        self.assertEqual(test_pkl_file, str(expected_pkl_file))
     def test_deprecation(self):
         """Ensure that deprecation works as expected.