Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
50f37a0
1
Parent(s):
9556e73
Add nested_constraints feature
Browse files- pysr/sr.py +25 -0
- test/test.py +2 -1
pysr/sr.py
CHANGED
@@ -391,6 +391,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
391 |
select_k_features=None,
|
392 |
warmup_maxsize_by=0.0,
|
393 |
constraints=None,
|
|
|
394 |
use_frequency=True,
|
395 |
use_frequency_in_tournament=True,
|
396 |
tempdir=None,
|
@@ -511,6 +512,16 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
511 |
:type warmup_maxsize_by: float
|
512 |
:param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
|
513 |
:type constraints: dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
:param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
|
515 |
:type use_frequency: bool
|
516 |
:param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
|
@@ -706,6 +717,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
706 |
select_k_features=select_k_features,
|
707 |
warmup_maxsize_by=warmup_maxsize_by,
|
708 |
constraints=constraints,
|
|
|
709 |
use_frequency=use_frequency,
|
710 |
use_frequency_in_tournament=use_frequency_in_tournament,
|
711 |
tempdir=tempdir,
|
@@ -1152,6 +1164,18 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1152 |
|
1153 |
una_constraints = [constraints[op] for op in unary_operators]
|
1154 |
bin_constraints = [constraints[op] for op in binary_operators]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1155 |
|
1156 |
if not already_ran:
|
1157 |
Main.eval("using Pkg")
|
@@ -1233,6 +1257,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1233 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
1234 |
bin_constraints=bin_constraints,
|
1235 |
una_constraints=una_constraints,
|
|
|
1236 |
loss=Main.custom_loss,
|
1237 |
maxsize=int(maxsize),
|
1238 |
hofFile=_escape_filename(self.equation_file),
|
|
|
391 |
select_k_features=None,
|
392 |
warmup_maxsize_by=0.0,
|
393 |
constraints=None,
|
394 |
+
nested_constraints=None,
|
395 |
use_frequency=True,
|
396 |
use_frequency_in_tournament=True,
|
397 |
tempdir=None,
|
|
|
512 |
:type warmup_maxsize_by: float
|
513 |
:param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
|
514 |
:type constraints: dict
|
515 |
+
:param nested_constraints: Specifies how many times a combination of operators can be nested. For example,
|
516 |
+
`{"sin": {"cos": 0}}, "cos": {"cos": 2}}` specifies that `cos` may never appear within a `sin`,
|
517 |
+
but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`
|
518 |
+
can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination
|
519 |
+
of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,
|
520 |
+
it is assumed that it can be nested an unlimited number of times. This requires that there is no operator
|
521 |
+
which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).
|
522 |
+
For binary operators, you only need to provide a single number: both arguments are treated the same way,
|
523 |
+
and the max of each argument is constrained.
|
524 |
+
:type nested_constraints: dict
|
525 |
:param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
|
526 |
:type use_frequency: bool
|
527 |
:param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
|
|
|
717 |
select_k_features=select_k_features,
|
718 |
warmup_maxsize_by=warmup_maxsize_by,
|
719 |
constraints=constraints,
|
720 |
+
nested_constraints=nested_constraints,
|
721 |
use_frequency=use_frequency,
|
722 |
use_frequency_in_tournament=use_frequency_in_tournament,
|
723 |
tempdir=tempdir,
|
|
|
1164 |
|
1165 |
una_constraints = [constraints[op] for op in unary_operators]
|
1166 |
bin_constraints = [constraints[op] for op in binary_operators]
|
1167 |
+
nested_constraints = self.params["nested_constraints"]
|
1168 |
+
if nested_constraints is not None:
|
1169 |
+
# Parse dict into Julia Dict:
|
1170 |
+
nested_constraints_str = "Dict("
|
1171 |
+
for outer_k, outer_v in nested_constraints.items():
|
1172 |
+
nested_constraints_str += f"({outer_k}) => Dict("
|
1173 |
+
for inner_k, inner_v in outer_v.items():
|
1174 |
+
nested_constraints_str += f"({inner_k}) => {inner_v}, "
|
1175 |
+
nested_constraints_str += "), "
|
1176 |
+
nested_constraints_str += ")"
|
1177 |
+
nested_constraints = Main.eval(nested_constraints_str)
|
1178 |
+
|
1179 |
|
1180 |
if not already_ran:
|
1181 |
Main.eval("using Pkg")
|
|
|
1257 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
1258 |
bin_constraints=bin_constraints,
|
1259 |
una_constraints=una_constraints,
|
1260 |
+
nested_constraints=nested_constraints,
|
1261 |
loss=Main.custom_loss,
|
1262 |
maxsize=int(maxsize),
|
1263 |
hofFile=_escape_filename(self.equation_file),
|
test/test.py
CHANGED
@@ -145,7 +145,7 @@ class TestPipeline(unittest.TestCase):
|
|
145 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
146 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
147 |
|
148 |
-
def
|
149 |
X = pd.DataFrame(
|
150 |
{
|
151 |
"T": self.rstate.randn(500),
|
@@ -174,6 +174,7 @@ class TestPipeline(unittest.TestCase):
|
|
174 |
Xresampled=Xresampled,
|
175 |
denoise=True,
|
176 |
select_k_features=2,
|
|
|
177 |
)
|
178 |
model.fit(X, y)
|
179 |
self.assertNotIn("unused_feature", model.latex())
|
|
|
145 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
146 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
147 |
|
148 |
+
def test_pandas_resample_with_nested_constraints(self):
|
149 |
X = pd.DataFrame(
|
150 |
{
|
151 |
"T": self.rstate.randn(500),
|
|
|
174 |
Xresampled=Xresampled,
|
175 |
denoise=True,
|
176 |
select_k_features=2,
|
177 |
+
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}}
|
178 |
)
|
179 |
model.fit(X, y)
|
180 |
self.assertNotIn("unused_feature", model.latex())
|