Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Jul 23, 2023

Commit

af0be92

unverified ·

1 Parent(s): 0e15dd6

Add print_precision and dimensional_constraint_penalty

Browse files

Files changed (3) hide show

docs/param_groupings.yml +3 -0
pysr/sr.py +11 -0
pysr/test/test.py +33 -6

docs/param_groupings.yml CHANGED Viewed

@@ -13,6 +13,7 @@
     - loss
     - full_objective
     - model_selection
   - Working with Complexities:
     - parsimony
     - constraints
@@ -72,12 +73,14 @@
   - fast_cycle
   - turbo
   - enable_autodiff
   - random_state
   - deterministic
   - warm_start
 - Monitoring:
   - verbosity
   - update_verbosity
   - progress
 - Environment:
   - temp_equation_file

     - loss
     - full_objective
     - model_selection
+    - dimensional_constraint_penalty
   - Working with Complexities:
     - parsimony
     - constraints
   - fast_cycle
   - turbo
   - enable_autodiff
+- Determinism:
   - random_state
   - deterministic
   - warm_start
 - Monitoring:
   - verbosity
   - update_verbosity
+  - print_precision
   - progress
 - Environment:
   - temp_equation_file

pysr/sr.py CHANGED Viewed

@@ -391,6 +391,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
     use_frequency : bool
         Whether to measure the frequency of complexities, and use that
         instead of parsimony to explore equation space. Will naturally
@@ -571,6 +574,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         What verbosity level to use for package updates.
         Will take value of `verbosity` if not given.
         Default is `None`.
     progress : bool
         Whether to use a progress bar instead of printing to stdout.
         Default is `True`.
@@ -738,6 +743,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         complexity_of_constants=1,
         complexity_of_variables=1,
         parsimony=0.0032,
         use_frequency=True,
         use_frequency_in_tournament=True,
         adaptive_parsimony_scaling=20.0,
@@ -784,6 +790,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         warm_start=False,
         verbosity=1e9,
         update_verbosity=None,
         progress=True,
         equation_file=None,
         temp_equation_file=False,
@@ -828,6 +835,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.complexity_of_constants = complexity_of_constants
         self.complexity_of_variables = complexity_of_variables
         self.parsimony = parsimony
         self.use_frequency = use_frequency
         self.use_frequency_in_tournament = use_frequency_in_tournament
         self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
@@ -879,6 +887,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         # - Runtime user interface
         self.verbosity = verbosity
         self.update_verbosity = update_verbosity
         self.progress = progress
         # - Project management
         self.equation_file = equation_file
@@ -1699,6 +1708,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             tournament_selection_n=self.tournament_selection_n,
             # These have the same name:
             parsimony=self.parsimony,
             alpha=self.alpha,
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
@@ -1718,6 +1728,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             fraction_replaced=self.fraction_replaced,
             topn=self.topn,
             verbosity=self.verbosity,
             optimizer_algorithm=self.optimizer_algorithm,
             optimizer_nrestarts=self.optimizer_nrestarts,
             optimizer_probability=self.optimize_probability,

     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
+    dimensional_constraint_penalty : float
+        Additive penalty for if dimensional analysis of an expression fails.
+        By default, this is `1000.0`.
     use_frequency : bool
         Whether to measure the frequency of complexities, and use that
         instead of parsimony to explore equation space. Will naturally
         What verbosity level to use for package updates.
         Will take value of `verbosity` if not given.
         Default is `None`.
+    print_precision : int
+        How many significant digits to print for floats. Default is `5`.
     progress : bool
         Whether to use a progress bar instead of printing to stdout.
         Default is `True`.
         complexity_of_constants=1,
         complexity_of_variables=1,
         parsimony=0.0032,
+        dimensional_constraint_penalty=None,
         use_frequency=True,
         use_frequency_in_tournament=True,
         adaptive_parsimony_scaling=20.0,
         warm_start=False,
         verbosity=1e9,
         update_verbosity=None,
+        print_precision=5,
         progress=True,
         equation_file=None,
         temp_equation_file=False,
         self.complexity_of_constants = complexity_of_constants
         self.complexity_of_variables = complexity_of_variables
         self.parsimony = parsimony
+        self.dimensional_constraint_penalty = dimensional_constraint_penalty
         self.use_frequency = use_frequency
         self.use_frequency_in_tournament = use_frequency_in_tournament
         self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
         # - Runtime user interface
         self.verbosity = verbosity
         self.update_verbosity = update_verbosity
+        self.print_precision = print_precision
         self.progress = progress
         # - Project management
         self.equation_file = equation_file
             tournament_selection_n=self.tournament_selection_n,
             # These have the same name:
             parsimony=self.parsimony,
+            dimensional_constraint_penalty=self.dimensional_constraint_penalty,
             alpha=self.alpha,
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
             fraction_replaced=self.fraction_replaced,
             topn=self.topn,
             verbosity=self.verbosity,
+            print_precision=self.print_precision,
             optimizer_algorithm=self.optimizer_algorithm,
             optimizer_nrestarts=self.optimizer_nrestarts,
             optimizer_probability=self.optimize_probability,

pysr/test/test.py CHANGED Viewed

@@ -10,6 +10,7 @@ import pandas as pd
 import warnings
 import pickle as pkl
 import tempfile
 from pathlib import Path
 from .. import julia_helpers
@@ -712,6 +713,35 @@ class TestMiscellaneous(unittest.TestCase):
         # If any checks failed don't let the test pass.
         self.assertEqual(len(exception_messages), 0)
 TRUE_PREAMBLE = "\n".join(
     [
@@ -944,7 +974,9 @@ class TestDimensionalConstraints(unittest.TestCase):
         for i in range(2):
             self.assertGreater(model.get_best()[i]["complexity"], 2)
             self.assertLess(model.get_best()[i]["loss"], 1e-6)
-            self.assertGreater(model.equations_[i].query("complexity <= 2").loss.min(), 1e-6)
     def test_unit_checks(self):
         """This just checks the number of units passed"""
@@ -1013,11 +1045,6 @@ class TestDimensionalConstraints(unittest.TestCase):
         self.assertEqual(best["complexity"], 3)
-# TODO: add tests for:
-# - no constants, so that it needs to find the right fraction
-# - custom dimensional_constraint_penalty
 def runtests():
     """Run all tests in test.py."""
     suite = unittest.TestSuite()

 import warnings
 import pickle as pkl
 import tempfile
+import yaml
 from pathlib import Path
 from .. import julia_helpers
         # If any checks failed don't let the test pass.
         self.assertEqual(len(exception_messages), 0)
+    def test_param_groupings(self):
+        """Test that param_groupings are complete"""
+        param_groupings_file = (
+            Path(__file__).parent.parent.parent / "docs" / "param_groupings.yml"
+        )
+        # Read the file:
+        with open(param_groupings_file, "r") as f:
+            param_groupings = yaml.load(f, Loader=yaml.SafeLoader)
+        # Get all leafs of this yaml file:
+        def get_leafs(d):
+            if isinstance(d, dict):
+                for v in d.values():
+                    yield from get_leafs(v)
+            elif isinstance(d, list):
+                for v in d:
+                    yield from get_leafs(v)
+            else:
+                yield d
+        leafs = list(get_leafs(param_groupings))
+        regressor_params = [
+            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
+        ]
+        # Check the sets are equal:
+        self.assertSetEqual(set(leafs), set(regressor_params))
 TRUE_PREAMBLE = "\n".join(
     [
         for i in range(2):
             self.assertGreater(model.get_best()[i]["complexity"], 2)
             self.assertLess(model.get_best()[i]["loss"], 1e-6)
+            self.assertGreater(
+                model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
+            )
     def test_unit_checks(self):
         """This just checks the number of units passed"""
         self.assertEqual(best["complexity"], 3)
 def runtests():
     """Run all tests in test.py."""
     suite = unittest.TestSuite()