MilesCranmer commited on
Commit
ccf71e9
·
1 Parent(s): 93cf05b

`load` function to init model from saved equations

Browse files
Files changed (3) hide show
  1. pysr/__init__.py +1 -0
  2. pysr/sr.py +74 -0
  3. test/test.py +27 -1
pysr/__init__.py CHANGED
@@ -6,6 +6,7 @@ from .sr import (
6
  best_tex,
7
  best_callable,
8
  best_row,
 
9
  )
10
  from .julia_helpers import install
11
  from .feynman_problems import Problem, FeynmanProblem
 
6
  best_tex,
7
  best_callable,
8
  best_row,
9
+ load,
10
  )
11
  from .julia_helpers import install
12
  from .feynman_problems import Problem, FeynmanProblem
pysr/sr.py CHANGED
@@ -2034,3 +2034,77 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
2034
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
2035
  )
2036
  return selector.get_support(indices=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2034
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
2035
  )
2036
  return selector.get_support(indices=True)
2037
+
2038
+
2039
+ def load(
2040
+ equation_file,
2041
+ *,
2042
+ binary_operators,
2043
+ unary_operators,
2044
+ n_features_in,
2045
+ feature_names_in=None,
2046
+ selection_mask=None,
2047
+ nout=1,
2048
+ **pysr_kwargs,
2049
+ ):
2050
+ """
2051
+ Create a model from equations stored as a csv file
2052
+
2053
+ Parameters
2054
+ ----------
2055
+ equation_file : str
2056
+ Path to a csv file containing equations.
2057
+
2058
+ binary_operators : list[str], default=["+", "-", "*", "/"]
2059
+ The same binary operators used when creating the model.
2060
+
2061
+ unary_operators : list[str], default=None
2062
+ The same unary operators used when creating the model.
2063
+
2064
+ n_features_in : int
2065
+ Number of features passed to the model.
2066
+
2067
+ feature_names_in : list[str], default=None
2068
+ Names of the features passed to the model.
2069
+
2070
+ selection_mask : list[bool], default=None
2071
+ If using select_k_features, you must pass `model.selection_mask_` here.
2072
+
2073
+ nout : int, default=1
2074
+ Number of outputs of the model.
2075
+
2076
+ pysr_kwargs : dict
2077
+ Any other keyword arguments to initialize the PySRRegressor object.
2078
+
2079
+ Returns
2080
+ -------
2081
+ model : PySRRegressor
2082
+ The model with fitted equations.
2083
+ """
2084
+
2085
+ # TODO: copy .bkup file if exists.
2086
+ model = PySRRegressor(
2087
+ equation_file=equation_file,
2088
+ binary_operators=binary_operators,
2089
+ unary_operators=unary_operators,
2090
+ **pysr_kwargs,
2091
+ )
2092
+
2093
+ model.equation_file_ = equation_file
2094
+ model.nout_ = nout
2095
+ model.n_features_in_ = n_features_in
2096
+
2097
+ if feature_names_in is None:
2098
+ model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
2099
+ else:
2100
+ assert len(feature_names_in) == n_features_in
2101
+ model.feature_names_in_ = feature_names_in
2102
+
2103
+ if selection_mask is None:
2104
+ model.selection_mask_ = np.ones(n_features_in, dtype=bool)
2105
+ else:
2106
+ model.selection_mask_ = selection_mask
2107
+
2108
+ model.refresh()
2109
+
2110
+ return model
test/test.py CHANGED
@@ -4,7 +4,7 @@ import inspect
4
  import unittest
5
  import numpy as np
6
  from sklearn import model_selection
7
- from pysr import PySRRegressor
8
  from pysr.sr import run_feature_selection, _handle_feature_selection
9
  from sklearn.utils.estimator_checks import check_estimator
10
  import sympy
@@ -280,6 +280,32 @@ class TestPipeline(unittest.TestCase):
280
  model.fit(X.values, y.values, Xresampled=Xresampled.values)
281
  self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  class TestBest(unittest.TestCase):
285
  def setUp(self):
 
4
  import unittest
5
  import numpy as np
6
  from sklearn import model_selection
7
+ from pysr import PySRRegressor, load
8
  from pysr.sr import run_feature_selection, _handle_feature_selection
9
  from sklearn.utils.estimator_checks import check_estimator
10
  import sympy
 
280
  model.fit(X.values, y.values, Xresampled=Xresampled.values)
281
  self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
282
 
283
+ def test_load_model(self):
284
+ """See if we can load a ran model from the equation file."""
285
+ csv_file_data = """
286
+ Complexity|MSE|Equation
287
+ 1|0.19951081|1.9762075
288
+ 3|0.12717344|(f0 + 1.4724599)
289
+ 4|0.104823045|pow_abs(2.2683423, cos(f3))"""
290
+ # Strip the indents:
291
+ csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
292
+ with open("equation_file.csv", "w") as f:
293
+ f.write(csv_file_data)
294
+ with open("equation_file.csv.bkup", "w") as f:
295
+ f.write(csv_file_data)
296
+ model = load(
297
+ "equation_file.csv",
298
+ n_features_in=5,
299
+ feature_names_in=["f0", "f1", "f2", "f3", "f4"],
300
+ binary_operators=["+", "*", "/", "-", "^"],
301
+ unary_operators=["cos"],
302
+ )
303
+ X = self.rstate.rand(100, 5)
304
+ y_truth = 2.2683423 ** np.cos(X[:, 3])
305
+ y_test = model.predict(X, 2)
306
+
307
+ np.testing.assert_allclose(y_truth, y_test)
308
+
309
 
310
  class TestBest(unittest.TestCase):
311
  def setUp(self):