MilesCranmer commited on
Commit
ed35c4e
1 Parent(s): 1369d9f

Make pipeline tests reproducibile

Browse files
Files changed (1) hide show
  1. test/test.py +20 -21
test/test.py CHANGED
@@ -24,8 +24,8 @@ class TestPipeline(unittest.TestCase):
24
  niterations=default_niterations * 2,
25
  populations=default_populations * 2,
26
  )
27
- np.random.seed(0)
28
- self.X = np.random.randn(100, 5)
29
 
30
  def test_linear_relation(self):
31
  y = self.X[:, 0]
@@ -73,7 +73,7 @@ class TestPipeline(unittest.TestCase):
73
 
74
  def test_multioutput_weighted_with_callable_temp_equation(self):
75
  y = self.X[:, [0, 1]] ** 2
76
- w = np.random.rand(*y.shape)
77
  w[w < 0.5] = 0.0
78
  w[w >= 0.5] = 1.0
79
 
@@ -100,7 +100,7 @@ class TestPipeline(unittest.TestCase):
100
  )
101
 
102
  def test_empty_operators_single_input_multirun(self):
103
- X = np.random.randn(100, 1)
104
  y = X[:, 0] + 3.0
105
  regressor = PySRRegressor(
106
  unary_operators=[],
@@ -130,8 +130,7 @@ class TestPipeline(unittest.TestCase):
130
 
131
  def test_noisy(self):
132
 
133
- np.random.seed(1)
134
- y = self.X[:, [0, 1]] ** 2 + np.random.randn(self.X.shape[0], 1) * 0.05
135
  model = PySRRegressor(
136
  # Test that passing a single operator works:
137
  unary_operators="sq(x) = x^2",
@@ -146,26 +145,25 @@ class TestPipeline(unittest.TestCase):
146
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
147
 
148
  def test_pandas_resample(self):
149
- np.random.seed(1)
150
  X = pd.DataFrame(
151
  {
152
- "T": np.random.randn(500),
153
- "x": np.random.randn(500),
154
- "unused_feature": np.random.randn(500),
155
  }
156
  )
157
  true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
158
  y = true_fn(X)
159
- noise = np.random.randn(500) * 0.01
160
  y = y + noise
161
  # We also test y as a pandas array:
162
  y = pd.Series(y)
163
  # Resampled array is a different order of features:
164
  Xresampled = pd.DataFrame(
165
  {
166
- "unused_feature": np.random.randn(100),
167
- "x": np.random.randn(100),
168
- "T": np.random.randn(100),
169
  }
170
  )
171
  model = PySRRegressor(
@@ -185,9 +183,9 @@ class TestPipeline(unittest.TestCase):
185
  self.assertListEqual(list(sorted(fn._selection)), [0, 1])
186
  X2 = pd.DataFrame(
187
  {
188
- "T": np.random.randn(100),
189
- "unused_feature": np.random.randn(100),
190
- "x": np.random.randn(100),
191
  }
192
  )
193
  self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-1)
@@ -218,6 +216,7 @@ class TestBest(unittest.TestCase):
218
  self.model.n_features = 2
219
  self.model.refresh()
220
  self.equations = self.model.equations
 
221
 
222
  def test_best(self):
223
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
@@ -232,7 +231,7 @@ class TestBest(unittest.TestCase):
232
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
233
 
234
  def test_best_lambda(self):
235
- X = np.random.randn(10, 2)
236
  y = np.cos(X[:, 0]) ** 2
237
  for f in [self.model.predict, self.equations.iloc[-1]["lambda_format"]]:
238
  np.testing.assert_almost_equal(f(X), y, decimal=4)
@@ -240,16 +239,16 @@ class TestBest(unittest.TestCase):
240
 
241
  class TestFeatureSelection(unittest.TestCase):
242
  def setUp(self):
243
- np.random.seed(0)
244
 
245
  def test_feature_selection(self):
246
- X = np.random.randn(20000, 5)
247
  y = X[:, 2] ** 2 + X[:, 3] ** 2
248
  selected = run_feature_selection(X, y, select_k_features=2)
249
  self.assertEqual(sorted(selected), [2, 3])
250
 
251
  def test_feature_selection_handler(self):
252
- X = np.random.randn(20000, 5)
253
  y = X[:, 2] ** 2 + X[:, 3] ** 2
254
  var_names = [f"x{i}" for i in range(5)]
255
  selected_X, selection = _handle_feature_selection(
 
24
  niterations=default_niterations * 2,
25
  populations=default_populations * 2,
26
  )
27
+ self.rstate = np.random.RandomState(0)
28
+ self.X = self.rstate.randn(100, 5)
29
 
30
  def test_linear_relation(self):
31
  y = self.X[:, 0]
 
73
 
74
  def test_multioutput_weighted_with_callable_temp_equation(self):
75
  y = self.X[:, [0, 1]] ** 2
76
+ w = self.rstate.rand(*y.shape)
77
  w[w < 0.5] = 0.0
78
  w[w >= 0.5] = 1.0
79
 
 
100
  )
101
 
102
  def test_empty_operators_single_input_multirun(self):
103
+ X = self.rstate.randn(100, 1)
104
  y = X[:, 0] + 3.0
105
  regressor = PySRRegressor(
106
  unary_operators=[],
 
130
 
131
  def test_noisy(self):
132
 
133
+ y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
 
134
  model = PySRRegressor(
135
  # Test that passing a single operator works:
136
  unary_operators="sq(x) = x^2",
 
145
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
146
 
147
  def test_pandas_resample(self):
 
148
  X = pd.DataFrame(
149
  {
150
+ "T": self.rstate.randn(500),
151
+ "x": self.rstate.randn(500),
152
+ "unused_feature": self.rstate.randn(500),
153
  }
154
  )
155
  true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
156
  y = true_fn(X)
157
+ noise = self.rstate.randn(500) * 0.01
158
  y = y + noise
159
  # We also test y as a pandas array:
160
  y = pd.Series(y)
161
  # Resampled array is a different order of features:
162
  Xresampled = pd.DataFrame(
163
  {
164
+ "unused_feature": self.rstate.randn(100),
165
+ "x": self.rstate.randn(100),
166
+ "T": self.rstate.randn(100),
167
  }
168
  )
169
  model = PySRRegressor(
 
183
  self.assertListEqual(list(sorted(fn._selection)), [0, 1])
184
  X2 = pd.DataFrame(
185
  {
186
+ "T": self.rstate.randn(100),
187
+ "unused_feature": self.rstate.randn(100),
188
+ "x": self.rstate.randn(100),
189
  }
190
  )
191
  self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-1)
 
216
  self.model.n_features = 2
217
  self.model.refresh()
218
  self.equations = self.model.equations
219
+ self.rstate = np.random.RandomState(0)
220
 
221
  def test_best(self):
222
  self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
 
231
  self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
232
 
233
  def test_best_lambda(self):
234
+ X = self.rstate.randn(10, 2)
235
  y = np.cos(X[:, 0]) ** 2
236
  for f in [self.model.predict, self.equations.iloc[-1]["lambda_format"]]:
237
  np.testing.assert_almost_equal(f(X), y, decimal=4)
 
239
 
240
  class TestFeatureSelection(unittest.TestCase):
241
  def setUp(self):
242
+ self.rstate = np.random.RandomState(0)
243
 
244
  def test_feature_selection(self):
245
+ X = self.rstate.randn(20000, 5)
246
  y = X[:, 2] ** 2 + X[:, 3] ** 2
247
  selected = run_feature_selection(X, y, select_k_features=2)
248
  self.assertEqual(sorted(selected), [2, 3])
249
 
250
  def test_feature_selection_handler(self):
251
+ X = self.rstate.randn(20000, 5)
252
  y = X[:, 2] ** 2 + X[:, 3] ** 2
253
  var_names = [f"x{i}" for i in range(5)]
254
  selected_X, selection = _handle_feature_selection(