MilesCranmer commited on
Commit
03d5a42
1 Parent(s): 3c4243b

Attempt to fix unit tests of equation file

Browse files
Files changed (2) hide show
  1. pysr/sr.py +26 -13
  2. test/test.py +12 -4
pysr/sr.py CHANGED
@@ -559,6 +559,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
559
  raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
560
  The state for the julia SymbolicRegression.jl backend post fitting.
561
 
 
 
 
562
  Notes
563
  -----
564
  Most default parameters have been tuned over several example equations,
@@ -959,6 +962,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
959
  self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
960
  else:
961
  self.equation_file_ = self.equation_file
 
962
 
963
  def _validate_and_set_init_params(self):
964
  """
@@ -1599,6 +1603,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1599
  check_is_fitted(self, attributes=["equation_file_"])
1600
  if checkpoint_file:
1601
  self.equation_file_ = checkpoint_file
 
1602
  self.equations_ = self.get_hof()
1603
 
1604
  def predict(self, X, index=None):
@@ -1771,18 +1776,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1771
  return [eq["torch_format"] for eq in best_equation]
1772
  return best_equation["torch_format"]
1773
 
1774
- def get_hof(self):
1775
- """Get the equations from a hall of fame file. If no arguments
1776
- entered, the ones used previously from a call to PySR will be used."""
1777
- check_is_fitted(
1778
- self,
1779
- attributes=[
1780
- "nout_",
1781
- "equation_file_",
1782
- "selection_mask_",
1783
- "feature_names_in_",
1784
- ],
1785
- )
1786
  try:
1787
  if self.nout_ > 1:
1788
  all_outputs = []
@@ -1817,6 +1812,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1817
  "Couldn't find equation file! The equation search likely exited "
1818
  "before a single iteration completed."
1819
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1820
 
1821
  # It is expected extra_jax/torch_mappings will be updated after fit.
1822
  # Thus, validation is performed here instead of in _validate_init_params
@@ -1843,7 +1856,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1843
 
1844
  ret_outputs = []
1845
 
1846
- for output in all_outputs:
1847
 
1848
  scores = []
1849
  lastMSE = None
 
559
  raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
560
  The state for the julia SymbolicRegression.jl backend post fitting.
561
 
562
+ equation_file_contents_ : list[pandas.DataFrame]
563
+ Contents of the equation file output by the Julia backend.
564
+
565
  Notes
566
  -----
567
  Most default parameters have been tuned over several example equations,
 
962
  self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
963
  else:
964
  self.equation_file_ = self.equation_file
965
+ self.equation_file_contents_ = None
966
 
967
  def _validate_and_set_init_params(self):
968
  """
 
1603
  check_is_fitted(self, attributes=["equation_file_"])
1604
  if checkpoint_file:
1605
  self.equation_file_ = checkpoint_file
1606
+ self.equation_file_contents_ = None
1607
  self.equations_ = self.get_hof()
1608
 
1609
  def predict(self, X, index=None):
 
1776
  return [eq["torch_format"] for eq in best_equation]
1777
  return best_equation["torch_format"]
1778
 
1779
+ def _read_equation_file(self):
1780
+ """Read the hall of fame file created by SymbolicRegression.jl"""
 
 
 
 
 
 
 
 
 
 
1781
  try:
1782
  if self.nout_ > 1:
1783
  all_outputs = []
 
1812
  "Couldn't find equation file! The equation search likely exited "
1813
  "before a single iteration completed."
1814
  )
1815
+ return all_outputs
1816
+
1817
+ def get_hof(self):
1818
+ """Get the equations from a hall of fame file. If no arguments
1819
+ entered, the ones used previously from a call to PySR will be used."""
1820
+ check_is_fitted(
1821
+ self,
1822
+ attributes=[
1823
+ "nout_",
1824
+ "equation_file_",
1825
+ "selection_mask_",
1826
+ "feature_names_in_",
1827
+ ],
1828
+ )
1829
+ if (
1830
+ not hasattr(self, "equation_file_contents_")
1831
+ ) or self.equation_file_contents_ is None:
1832
+ self.equation_file_contents_ = self._read_equation_file()
1833
 
1834
  # It is expected extra_jax/torch_mappings will be updated after fit.
1835
  # Thus, validation is performed here instead of in _validate_init_params
 
1856
 
1857
  ret_outputs = []
1858
 
1859
+ for output in self.equation_file_contents_:
1860
 
1861
  scores = []
1862
  lastMSE = None
test/test.py CHANGED
@@ -115,7 +115,6 @@ class TestPipeline(unittest.TestCase):
115
  extra_sympy_mappings={"sq": lambda x: x**2},
116
  **self.default_test_kwargs,
117
  procs=0,
118
- temp_equation_file=True,
119
  delete_tempfiles=False,
120
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
121
  )
@@ -158,8 +157,13 @@ class TestPipeline(unittest.TestCase):
158
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
159
 
160
  # Test if repeated fit works:
161
- regressor.set_params(niterations=0, warm_start=True, early_stop_condition=None)
162
- # This should exit immediately, and use the old equations
 
 
 
 
 
163
  regressor.fit(X, y)
164
 
165
  self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
@@ -272,7 +276,6 @@ class TestBest(unittest.TestCase):
272
  model_selection="accuracy",
273
  equation_file="equation_file.csv",
274
  )
275
- self.model.fit(self.X, self.y)
276
  equations = pd.DataFrame(
277
  {
278
  "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
@@ -281,6 +284,11 @@ class TestBest(unittest.TestCase):
281
  }
282
  )
283
 
 
 
 
 
 
284
  equations["complexity loss equation".split(" ")].to_csv(
285
  "equation_file.csv.bkup", sep="|"
286
  )
 
115
  extra_sympy_mappings={"sq": lambda x: x**2},
116
  **self.default_test_kwargs,
117
  procs=0,
 
118
  delete_tempfiles=False,
119
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
120
  )
 
157
  np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
158
 
159
  # Test if repeated fit works:
160
+ regressor.set_params(
161
+ niterations=1,
162
+ ncyclesperiteration=2,
163
+ warm_start=True,
164
+ early_stop_condition=None,
165
+ )
166
+ # This should exit almost immediately, and use the old equations
167
  regressor.fit(X, y)
168
 
169
  self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
 
276
  model_selection="accuracy",
277
  equation_file="equation_file.csv",
278
  )
 
279
  equations = pd.DataFrame(
280
  {
281
  "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
 
284
  }
285
  )
286
 
287
+ # Set up internal parameters as if it had been fitted:
288
+ self.model.equation_file_ = "equation_file.csv"
289
+ self.model.nout_ = 1
290
+ self.model.selection_mask_ = None
291
+ self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
292
  equations["complexity loss equation".split(" ")].to_csv(
293
  "equation_file.csv.bkup", sep="|"
294
  )