MilesCranmer commited on
Commit
50f37a0
1 Parent(s): 9556e73

Add nested_constraints feature

Browse files
Files changed (2) hide show
  1. pysr/sr.py +25 -0
  2. test/test.py +2 -1
pysr/sr.py CHANGED
@@ -391,6 +391,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
391
  select_k_features=None,
392
  warmup_maxsize_by=0.0,
393
  constraints=None,
 
394
  use_frequency=True,
395
  use_frequency_in_tournament=True,
396
  tempdir=None,
@@ -511,6 +512,16 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
511
  :type warmup_maxsize_by: float
512
  :param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
513
  :type constraints: dict
 
 
 
 
 
 
 
 
 
 
514
  :param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
515
  :type use_frequency: bool
516
  :param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
@@ -706,6 +717,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
706
  select_k_features=select_k_features,
707
  warmup_maxsize_by=warmup_maxsize_by,
708
  constraints=constraints,
 
709
  use_frequency=use_frequency,
710
  use_frequency_in_tournament=use_frequency_in_tournament,
711
  tempdir=tempdir,
@@ -1152,6 +1164,18 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
1152
 
1153
  una_constraints = [constraints[op] for op in unary_operators]
1154
  bin_constraints = [constraints[op] for op in binary_operators]
 
 
 
 
 
 
 
 
 
 
 
 
1155
 
1156
  if not already_ran:
1157
  Main.eval("using Pkg")
@@ -1233,6 +1257,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
1233
  unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
1234
  bin_constraints=bin_constraints,
1235
  una_constraints=una_constraints,
 
1236
  loss=Main.custom_loss,
1237
  maxsize=int(maxsize),
1238
  hofFile=_escape_filename(self.equation_file),
 
391
  select_k_features=None,
392
  warmup_maxsize_by=0.0,
393
  constraints=None,
394
+ nested_constraints=None,
395
  use_frequency=True,
396
  use_frequency_in_tournament=True,
397
  tempdir=None,
 
512
  :type warmup_maxsize_by: float
513
  :param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
514
  :type constraints: dict
515
+ :param nested_constraints: Specifies how many times a combination of operators can be nested. For example,
516
+ `{"sin": {"cos": 0}}, "cos": {"cos": 2}}` specifies that `cos` may never appear within a `sin`,
517
+ but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`
518
+ can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination
519
+ of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,
520
+ it is assumed that it can be nested an unlimited number of times. This requires that there is no operator
521
+ which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).
522
+ For binary operators, you only need to provide a single number: both arguments are treated the same way,
523
+ and the max of each argument is constrained.
524
+ :type nested_constraints: dict
525
  :param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
526
  :type use_frequency: bool
527
  :param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
 
717
  select_k_features=select_k_features,
718
  warmup_maxsize_by=warmup_maxsize_by,
719
  constraints=constraints,
720
+ nested_constraints=nested_constraints,
721
  use_frequency=use_frequency,
722
  use_frequency_in_tournament=use_frequency_in_tournament,
723
  tempdir=tempdir,
 
1164
 
1165
  una_constraints = [constraints[op] for op in unary_operators]
1166
  bin_constraints = [constraints[op] for op in binary_operators]
1167
+ nested_constraints = self.params["nested_constraints"]
1168
+ if nested_constraints is not None:
1169
+ # Parse dict into Julia Dict:
1170
+ nested_constraints_str = "Dict("
1171
+ for outer_k, outer_v in nested_constraints.items():
1172
+ nested_constraints_str += f"({outer_k}) => Dict("
1173
+ for inner_k, inner_v in outer_v.items():
1174
+ nested_constraints_str += f"({inner_k}) => {inner_v}, "
1175
+ nested_constraints_str += "), "
1176
+ nested_constraints_str += ")"
1177
+ nested_constraints = Main.eval(nested_constraints_str)
1178
+
1179
 
1180
  if not already_ran:
1181
  Main.eval("using Pkg")
 
1257
  unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
1258
  bin_constraints=bin_constraints,
1259
  una_constraints=una_constraints,
1260
+ nested_constraints=nested_constraints,
1261
  loss=Main.custom_loss,
1262
  maxsize=int(maxsize),
1263
  hofFile=_escape_filename(self.equation_file),
test/test.py CHANGED
@@ -145,7 +145,7 @@ class TestPipeline(unittest.TestCase):
145
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
146
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
147
 
148
- def test_pandas_resample(self):
149
  X = pd.DataFrame(
150
  {
151
  "T": self.rstate.randn(500),
@@ -174,6 +174,7 @@ class TestPipeline(unittest.TestCase):
174
  Xresampled=Xresampled,
175
  denoise=True,
176
  select_k_features=2,
 
177
  )
178
  model.fit(X, y)
179
  self.assertNotIn("unused_feature", model.latex())
 
145
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
146
  self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
147
 
148
+ def test_pandas_resample_with_nested_constraints(self):
149
  X = pd.DataFrame(
150
  {
151
  "T": self.rstate.randn(500),
 
174
  Xresampled=Xresampled,
175
  denoise=True,
176
  select_k_features=2,
177
+ nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}}
178
  )
179
  model.fit(X, y)
180
  self.assertNotIn("unused_feature", model.latex())