Spaces:
Running
Running
MilesCranmer
commited on
Add print_precision and dimensional_constraint_penalty
Browse files- docs/param_groupings.yml +3 -0
- pysr/sr.py +11 -0
- pysr/test/test.py +33 -6
docs/param_groupings.yml
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
- loss
|
14 |
- full_objective
|
15 |
- model_selection
|
|
|
16 |
- Working with Complexities:
|
17 |
- parsimony
|
18 |
- constraints
|
@@ -72,12 +73,14 @@
|
|
72 |
- fast_cycle
|
73 |
- turbo
|
74 |
- enable_autodiff
|
|
|
75 |
- random_state
|
76 |
- deterministic
|
77 |
- warm_start
|
78 |
- Monitoring:
|
79 |
- verbosity
|
80 |
- update_verbosity
|
|
|
81 |
- progress
|
82 |
- Environment:
|
83 |
- temp_equation_file
|
|
|
13 |
- loss
|
14 |
- full_objective
|
15 |
- model_selection
|
16 |
+
- dimensional_constraint_penalty
|
17 |
- Working with Complexities:
|
18 |
- parsimony
|
19 |
- constraints
|
|
|
73 |
- fast_cycle
|
74 |
- turbo
|
75 |
- enable_autodiff
|
76 |
+
- Determinism:
|
77 |
- random_state
|
78 |
- deterministic
|
79 |
- warm_start
|
80 |
- Monitoring:
|
81 |
- verbosity
|
82 |
- update_verbosity
|
83 |
+
- print_precision
|
84 |
- progress
|
85 |
- Environment:
|
86 |
- temp_equation_file
|
pysr/sr.py
CHANGED
@@ -391,6 +391,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
391 |
parsimony : float
|
392 |
Multiplicative factor for how much to punish complexity.
|
393 |
Default is `0.0032`.
|
|
|
|
|
|
|
394 |
use_frequency : bool
|
395 |
Whether to measure the frequency of complexities, and use that
|
396 |
instead of parsimony to explore equation space. Will naturally
|
@@ -571,6 +574,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
571 |
What verbosity level to use for package updates.
|
572 |
Will take value of `verbosity` if not given.
|
573 |
Default is `None`.
|
|
|
|
|
574 |
progress : bool
|
575 |
Whether to use a progress bar instead of printing to stdout.
|
576 |
Default is `True`.
|
@@ -738,6 +743,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
738 |
complexity_of_constants=1,
|
739 |
complexity_of_variables=1,
|
740 |
parsimony=0.0032,
|
|
|
741 |
use_frequency=True,
|
742 |
use_frequency_in_tournament=True,
|
743 |
adaptive_parsimony_scaling=20.0,
|
@@ -784,6 +790,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
784 |
warm_start=False,
|
785 |
verbosity=1e9,
|
786 |
update_verbosity=None,
|
|
|
787 |
progress=True,
|
788 |
equation_file=None,
|
789 |
temp_equation_file=False,
|
@@ -828,6 +835,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
828 |
self.complexity_of_constants = complexity_of_constants
|
829 |
self.complexity_of_variables = complexity_of_variables
|
830 |
self.parsimony = parsimony
|
|
|
831 |
self.use_frequency = use_frequency
|
832 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
833 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
@@ -879,6 +887,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
879 |
# - Runtime user interface
|
880 |
self.verbosity = verbosity
|
881 |
self.update_verbosity = update_verbosity
|
|
|
882 |
self.progress = progress
|
883 |
# - Project management
|
884 |
self.equation_file = equation_file
|
@@ -1699,6 +1708,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1699 |
tournament_selection_n=self.tournament_selection_n,
|
1700 |
# These have the same name:
|
1701 |
parsimony=self.parsimony,
|
|
|
1702 |
alpha=self.alpha,
|
1703 |
maxdepth=maxdepth,
|
1704 |
fast_cycle=self.fast_cycle,
|
@@ -1718,6 +1728,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1718 |
fraction_replaced=self.fraction_replaced,
|
1719 |
topn=self.topn,
|
1720 |
verbosity=self.verbosity,
|
|
|
1721 |
optimizer_algorithm=self.optimizer_algorithm,
|
1722 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
1723 |
optimizer_probability=self.optimize_probability,
|
|
|
391 |
parsimony : float
|
392 |
Multiplicative factor for how much to punish complexity.
|
393 |
Default is `0.0032`.
|
394 |
+
dimensional_constraint_penalty : float
|
395 |
+
Additive penalty for if dimensional analysis of an expression fails.
|
396 |
+
By default, this is `1000.0`.
|
397 |
use_frequency : bool
|
398 |
Whether to measure the frequency of complexities, and use that
|
399 |
instead of parsimony to explore equation space. Will naturally
|
|
|
574 |
What verbosity level to use for package updates.
|
575 |
Will take value of `verbosity` if not given.
|
576 |
Default is `None`.
|
577 |
+
print_precision : int
|
578 |
+
How many significant digits to print for floats. Default is `5`.
|
579 |
progress : bool
|
580 |
Whether to use a progress bar instead of printing to stdout.
|
581 |
Default is `True`.
|
|
|
743 |
complexity_of_constants=1,
|
744 |
complexity_of_variables=1,
|
745 |
parsimony=0.0032,
|
746 |
+
dimensional_constraint_penalty=None,
|
747 |
use_frequency=True,
|
748 |
use_frequency_in_tournament=True,
|
749 |
adaptive_parsimony_scaling=20.0,
|
|
|
790 |
warm_start=False,
|
791 |
verbosity=1e9,
|
792 |
update_verbosity=None,
|
793 |
+
print_precision=5,
|
794 |
progress=True,
|
795 |
equation_file=None,
|
796 |
temp_equation_file=False,
|
|
|
835 |
self.complexity_of_constants = complexity_of_constants
|
836 |
self.complexity_of_variables = complexity_of_variables
|
837 |
self.parsimony = parsimony
|
838 |
+
self.dimensional_constraint_penalty = dimensional_constraint_penalty
|
839 |
self.use_frequency = use_frequency
|
840 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
841 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
|
|
887 |
# - Runtime user interface
|
888 |
self.verbosity = verbosity
|
889 |
self.update_verbosity = update_verbosity
|
890 |
+
self.print_precision = print_precision
|
891 |
self.progress = progress
|
892 |
# - Project management
|
893 |
self.equation_file = equation_file
|
|
|
1708 |
tournament_selection_n=self.tournament_selection_n,
|
1709 |
# These have the same name:
|
1710 |
parsimony=self.parsimony,
|
1711 |
+
dimensional_constraint_penalty=self.dimensional_constraint_penalty,
|
1712 |
alpha=self.alpha,
|
1713 |
maxdepth=maxdepth,
|
1714 |
fast_cycle=self.fast_cycle,
|
|
|
1728 |
fraction_replaced=self.fraction_replaced,
|
1729 |
topn=self.topn,
|
1730 |
verbosity=self.verbosity,
|
1731 |
+
print_precision=self.print_precision,
|
1732 |
optimizer_algorithm=self.optimizer_algorithm,
|
1733 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
1734 |
optimizer_probability=self.optimize_probability,
|
pysr/test/test.py
CHANGED
@@ -10,6 +10,7 @@ import pandas as pd
|
|
10 |
import warnings
|
11 |
import pickle as pkl
|
12 |
import tempfile
|
|
|
13 |
from pathlib import Path
|
14 |
|
15 |
from .. import julia_helpers
|
@@ -712,6 +713,35 @@ class TestMiscellaneous(unittest.TestCase):
|
|
712 |
# If any checks failed don't let the test pass.
|
713 |
self.assertEqual(len(exception_messages), 0)
|
714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
715 |
|
716 |
TRUE_PREAMBLE = "\n".join(
|
717 |
[
|
@@ -944,7 +974,9 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
944 |
for i in range(2):
|
945 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
946 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
947 |
-
self.assertGreater(
|
|
|
|
|
948 |
|
949 |
def test_unit_checks(self):
|
950 |
"""This just checks the number of units passed"""
|
@@ -1013,11 +1045,6 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
1013 |
self.assertEqual(best["complexity"], 3)
|
1014 |
|
1015 |
|
1016 |
-
# TODO: add tests for:
|
1017 |
-
# - no constants, so that it needs to find the right fraction
|
1018 |
-
# - custom dimensional_constraint_penalty
|
1019 |
-
|
1020 |
-
|
1021 |
def runtests():
|
1022 |
"""Run all tests in test.py."""
|
1023 |
suite = unittest.TestSuite()
|
|
|
10 |
import warnings
|
11 |
import pickle as pkl
|
12 |
import tempfile
|
13 |
+
import yaml
|
14 |
from pathlib import Path
|
15 |
|
16 |
from .. import julia_helpers
|
|
|
713 |
# If any checks failed don't let the test pass.
|
714 |
self.assertEqual(len(exception_messages), 0)
|
715 |
|
716 |
+
def test_param_groupings(self):
|
717 |
+
"""Test that param_groupings are complete"""
|
718 |
+
param_groupings_file = (
|
719 |
+
Path(__file__).parent.parent.parent / "docs" / "param_groupings.yml"
|
720 |
+
)
|
721 |
+
# Read the file:
|
722 |
+
with open(param_groupings_file, "r") as f:
|
723 |
+
param_groupings = yaml.load(f, Loader=yaml.SafeLoader)
|
724 |
+
|
725 |
+
# Get all leafs of this yaml file:
|
726 |
+
def get_leafs(d):
|
727 |
+
if isinstance(d, dict):
|
728 |
+
for v in d.values():
|
729 |
+
yield from get_leafs(v)
|
730 |
+
elif isinstance(d, list):
|
731 |
+
for v in d:
|
732 |
+
yield from get_leafs(v)
|
733 |
+
else:
|
734 |
+
yield d
|
735 |
+
|
736 |
+
leafs = list(get_leafs(param_groupings))
|
737 |
+
|
738 |
+
regressor_params = [
|
739 |
+
p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
|
740 |
+
]
|
741 |
+
|
742 |
+
# Check the sets are equal:
|
743 |
+
self.assertSetEqual(set(leafs), set(regressor_params))
|
744 |
+
|
745 |
|
746 |
TRUE_PREAMBLE = "\n".join(
|
747 |
[
|
|
|
974 |
for i in range(2):
|
975 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
976 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
977 |
+
self.assertGreater(
|
978 |
+
model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
|
979 |
+
)
|
980 |
|
981 |
def test_unit_checks(self):
|
982 |
"""This just checks the number of units passed"""
|
|
|
1045 |
self.assertEqual(best["complexity"], 3)
|
1046 |
|
1047 |
|
|
|
|
|
|
|
|
|
|
|
1048 |
def runtests():
|
1049 |
"""Run all tests in test.py."""
|
1050 |
suite = unittest.TestSuite()
|