Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Jul 20, 2022

Commit

ccf71e9

1 Parent(s): 93cf05b

`load` function to init model from saved equations

Browse files

Files changed (3) hide show

pysr/__init__.py +1 -0
pysr/sr.py +74 -0
test/test.py +27 -1

pysr/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .sr import (
     best_tex,
     best_callable,
     best_row,
 )
 from .julia_helpers import install
 from .feynman_problems import Problem, FeynmanProblem

     best_tex,
     best_callable,
     best_row,
+    load,
 )
 from .julia_helpers import install
 from .feynman_problems import Problem, FeynmanProblem

pysr/sr.py CHANGED Viewed

@@ -2034,3 +2034,77 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
     return selector.get_support(indices=True)

         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
     return selector.get_support(indices=True)
+def load(
+    equation_file,
+    *,
+    binary_operators,
+    unary_operators,
+    n_features_in,
+    feature_names_in=None,
+    selection_mask=None,
+    nout=1,
+    **pysr_kwargs,
+):
+    """
+    Create a model from equations stored as a csv file
+    Parameters
+    ----------
+    equation_file : str
+        Path to a csv file containing equations.
+    binary_operators : list[str], default=["+", "-", "*", "/"]
+        The same binary operators used when creating the model.
+    unary_operators : list[str], default=None
+        The same unary operators used when creating the model.
+    n_features_in : int
+        Number of features passed to the model.
+    feature_names_in : list[str], default=None
+        Names of the features passed to the model.
+    selection_mask : list[bool], default=None
+        If using select_k_features, you must pass `model.selection_mask_` here.
+    nout : int, default=1
+        Number of outputs of the model.
+    pysr_kwargs : dict
+        Any other keyword arguments to initialize the PySRRegressor object.
+    Returns
+    -------
+    model : PySRRegressor
+        The model with fitted equations.
+    """
+    # TODO: copy .bkup file if exists.
+    model = PySRRegressor(
+        equation_file=equation_file,
+        binary_operators=binary_operators,
+        unary_operators=unary_operators,
+        **pysr_kwargs,
+    )
+    model.equation_file_ = equation_file
+    model.nout_ = nout
+    model.n_features_in_ = n_features_in
+    if feature_names_in is None:
+        model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
+    else:
+        assert len(feature_names_in) == n_features_in
+        model.feature_names_in_ = feature_names_in
+    if selection_mask is None:
+        model.selection_mask_ = np.ones(n_features_in, dtype=bool)
+    else:
+        model.selection_mask_ = selection_mask
+    model.refresh()
+    return model

test/test.py CHANGED Viewed

@@ -4,7 +4,7 @@ import inspect
 import unittest
 import numpy as np
 from sklearn import model_selection
-from pysr import PySRRegressor
 from pysr.sr import run_feature_selection, _handle_feature_selection
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
@@ -280,6 +280,32 @@ class TestPipeline(unittest.TestCase):
         model.fit(X.values, y.values, Xresampled=Xresampled.values)
         self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
 class TestBest(unittest.TestCase):
     def setUp(self):

 import unittest
 import numpy as np
 from sklearn import model_selection
+from pysr import PySRRegressor, load
 from pysr.sr import run_feature_selection, _handle_feature_selection
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
         model.fit(X.values, y.values, Xresampled=Xresampled.values)
         self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
+    def test_load_model(self):
+        """See if we can load a ran model from the equation file."""
+        csv_file_data = """
+        Complexity|MSE|Equation
+        1|0.19951081|1.9762075
+        3|0.12717344|(f0 + 1.4724599)
+        4|0.104823045|pow_abs(2.2683423, cos(f3))"""
+        # Strip the indents:
+        csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
+        with open("equation_file.csv", "w") as f:
+            f.write(csv_file_data)
+        with open("equation_file.csv.bkup", "w") as f:
+            f.write(csv_file_data)
+        model = load(
+            "equation_file.csv",
+            n_features_in=5,
+            feature_names_in=["f0", "f1", "f2", "f3", "f4"],
+            binary_operators=["+", "*", "/", "-", "^"],
+            unary_operators=["cos"],
+        )
+        X = self.rstate.rand(100, 5)
+        y_truth = 2.2683423 ** np.cos(X[:, 3])
+        y_test = model.predict(X, 2)
+        np.testing.assert_allclose(y_truth, y_test)
 class TestBest(unittest.TestCase):
     def setUp(self):