MilesCranmer commited on
Commit
af0be92
·
unverified ·
1 Parent(s): 0e15dd6

Add print_precision and dimensional_constraint_penalty

Browse files
Files changed (3) hide show
  1. docs/param_groupings.yml +3 -0
  2. pysr/sr.py +11 -0
  3. pysr/test/test.py +33 -6
docs/param_groupings.yml CHANGED
@@ -13,6 +13,7 @@
13
  - loss
14
  - full_objective
15
  - model_selection
 
16
  - Working with Complexities:
17
  - parsimony
18
  - constraints
@@ -72,12 +73,14 @@
72
  - fast_cycle
73
  - turbo
74
  - enable_autodiff
 
75
  - random_state
76
  - deterministic
77
  - warm_start
78
  - Monitoring:
79
  - verbosity
80
  - update_verbosity
 
81
  - progress
82
  - Environment:
83
  - temp_equation_file
 
13
  - loss
14
  - full_objective
15
  - model_selection
16
+ - dimensional_constraint_penalty
17
  - Working with Complexities:
18
  - parsimony
19
  - constraints
 
73
  - fast_cycle
74
  - turbo
75
  - enable_autodiff
76
+ - Determinism:
77
  - random_state
78
  - deterministic
79
  - warm_start
80
  - Monitoring:
81
  - verbosity
82
  - update_verbosity
83
+ - print_precision
84
  - progress
85
  - Environment:
86
  - temp_equation_file
pysr/sr.py CHANGED
@@ -391,6 +391,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
391
  parsimony : float
392
  Multiplicative factor for how much to punish complexity.
393
  Default is `0.0032`.
 
 
 
394
  use_frequency : bool
395
  Whether to measure the frequency of complexities, and use that
396
  instead of parsimony to explore equation space. Will naturally
@@ -571,6 +574,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
571
  What verbosity level to use for package updates.
572
  Will take value of `verbosity` if not given.
573
  Default is `None`.
 
 
574
  progress : bool
575
  Whether to use a progress bar instead of printing to stdout.
576
  Default is `True`.
@@ -738,6 +743,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
738
  complexity_of_constants=1,
739
  complexity_of_variables=1,
740
  parsimony=0.0032,
 
741
  use_frequency=True,
742
  use_frequency_in_tournament=True,
743
  adaptive_parsimony_scaling=20.0,
@@ -784,6 +790,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
784
  warm_start=False,
785
  verbosity=1e9,
786
  update_verbosity=None,
 
787
  progress=True,
788
  equation_file=None,
789
  temp_equation_file=False,
@@ -828,6 +835,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
828
  self.complexity_of_constants = complexity_of_constants
829
  self.complexity_of_variables = complexity_of_variables
830
  self.parsimony = parsimony
 
831
  self.use_frequency = use_frequency
832
  self.use_frequency_in_tournament = use_frequency_in_tournament
833
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
@@ -879,6 +887,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
879
  # - Runtime user interface
880
  self.verbosity = verbosity
881
  self.update_verbosity = update_verbosity
 
882
  self.progress = progress
883
  # - Project management
884
  self.equation_file = equation_file
@@ -1699,6 +1708,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1699
  tournament_selection_n=self.tournament_selection_n,
1700
  # These have the same name:
1701
  parsimony=self.parsimony,
 
1702
  alpha=self.alpha,
1703
  maxdepth=maxdepth,
1704
  fast_cycle=self.fast_cycle,
@@ -1718,6 +1728,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1718
  fraction_replaced=self.fraction_replaced,
1719
  topn=self.topn,
1720
  verbosity=self.verbosity,
 
1721
  optimizer_algorithm=self.optimizer_algorithm,
1722
  optimizer_nrestarts=self.optimizer_nrestarts,
1723
  optimizer_probability=self.optimize_probability,
 
391
  parsimony : float
392
  Multiplicative factor for how much to punish complexity.
393
  Default is `0.0032`.
394
+ dimensional_constraint_penalty : float
395
+ Additive penalty for if dimensional analysis of an expression fails.
396
+ By default, this is `1000.0`.
397
  use_frequency : bool
398
  Whether to measure the frequency of complexities, and use that
399
  instead of parsimony to explore equation space. Will naturally
 
574
  What verbosity level to use for package updates.
575
  Will take value of `verbosity` if not given.
576
  Default is `None`.
577
+ print_precision : int
578
+ How many significant digits to print for floats. Default is `5`.
579
  progress : bool
580
  Whether to use a progress bar instead of printing to stdout.
581
  Default is `True`.
 
743
  complexity_of_constants=1,
744
  complexity_of_variables=1,
745
  parsimony=0.0032,
746
+ dimensional_constraint_penalty=None,
747
  use_frequency=True,
748
  use_frequency_in_tournament=True,
749
  adaptive_parsimony_scaling=20.0,
 
790
  warm_start=False,
791
  verbosity=1e9,
792
  update_verbosity=None,
793
+ print_precision=5,
794
  progress=True,
795
  equation_file=None,
796
  temp_equation_file=False,
 
835
  self.complexity_of_constants = complexity_of_constants
836
  self.complexity_of_variables = complexity_of_variables
837
  self.parsimony = parsimony
838
+ self.dimensional_constraint_penalty = dimensional_constraint_penalty
839
  self.use_frequency = use_frequency
840
  self.use_frequency_in_tournament = use_frequency_in_tournament
841
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
 
887
  # - Runtime user interface
888
  self.verbosity = verbosity
889
  self.update_verbosity = update_verbosity
890
+ self.print_precision = print_precision
891
  self.progress = progress
892
  # - Project management
893
  self.equation_file = equation_file
 
1708
  tournament_selection_n=self.tournament_selection_n,
1709
  # These have the same name:
1710
  parsimony=self.parsimony,
1711
+ dimensional_constraint_penalty=self.dimensional_constraint_penalty,
1712
  alpha=self.alpha,
1713
  maxdepth=maxdepth,
1714
  fast_cycle=self.fast_cycle,
 
1728
  fraction_replaced=self.fraction_replaced,
1729
  topn=self.topn,
1730
  verbosity=self.verbosity,
1731
+ print_precision=self.print_precision,
1732
  optimizer_algorithm=self.optimizer_algorithm,
1733
  optimizer_nrestarts=self.optimizer_nrestarts,
1734
  optimizer_probability=self.optimize_probability,
pysr/test/test.py CHANGED
@@ -10,6 +10,7 @@ import pandas as pd
10
  import warnings
11
  import pickle as pkl
12
  import tempfile
 
13
  from pathlib import Path
14
 
15
  from .. import julia_helpers
@@ -712,6 +713,35 @@ class TestMiscellaneous(unittest.TestCase):
712
  # If any checks failed don't let the test pass.
713
  self.assertEqual(len(exception_messages), 0)
714
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
 
716
  TRUE_PREAMBLE = "\n".join(
717
  [
@@ -944,7 +974,9 @@ class TestDimensionalConstraints(unittest.TestCase):
944
  for i in range(2):
945
  self.assertGreater(model.get_best()[i]["complexity"], 2)
946
  self.assertLess(model.get_best()[i]["loss"], 1e-6)
947
- self.assertGreater(model.equations_[i].query("complexity <= 2").loss.min(), 1e-6)
 
 
948
 
949
  def test_unit_checks(self):
950
  """This just checks the number of units passed"""
@@ -1013,11 +1045,6 @@ class TestDimensionalConstraints(unittest.TestCase):
1013
  self.assertEqual(best["complexity"], 3)
1014
 
1015
 
1016
- # TODO: add tests for:
1017
- # - no constants, so that it needs to find the right fraction
1018
- # - custom dimensional_constraint_penalty
1019
-
1020
-
1021
  def runtests():
1022
  """Run all tests in test.py."""
1023
  suite = unittest.TestSuite()
 
10
  import warnings
11
  import pickle as pkl
12
  import tempfile
13
+ import yaml
14
  from pathlib import Path
15
 
16
  from .. import julia_helpers
 
713
  # If any checks failed don't let the test pass.
714
  self.assertEqual(len(exception_messages), 0)
715
 
716
+ def test_param_groupings(self):
717
+ """Test that param_groupings are complete"""
718
+ param_groupings_file = (
719
+ Path(__file__).parent.parent.parent / "docs" / "param_groupings.yml"
720
+ )
721
+ # Read the file:
722
+ with open(param_groupings_file, "r") as f:
723
+ param_groupings = yaml.load(f, Loader=yaml.SafeLoader)
724
+
725
+ # Get all leafs of this yaml file:
726
+ def get_leafs(d):
727
+ if isinstance(d, dict):
728
+ for v in d.values():
729
+ yield from get_leafs(v)
730
+ elif isinstance(d, list):
731
+ for v in d:
732
+ yield from get_leafs(v)
733
+ else:
734
+ yield d
735
+
736
+ leafs = list(get_leafs(param_groupings))
737
+
738
+ regressor_params = [
739
+ p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
740
+ ]
741
+
742
+ # Check the sets are equal:
743
+ self.assertSetEqual(set(leafs), set(regressor_params))
744
+
745
 
746
  TRUE_PREAMBLE = "\n".join(
747
  [
 
974
  for i in range(2):
975
  self.assertGreater(model.get_best()[i]["complexity"], 2)
976
  self.assertLess(model.get_best()[i]["loss"], 1e-6)
977
+ self.assertGreater(
978
+ model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
979
+ )
980
 
981
  def test_unit_checks(self):
982
  """This just checks the number of units passed"""
 
1045
  self.assertEqual(best["complexity"], 3)
1046
 
1047
 
 
 
 
 
 
1048
  def runtests():
1049
  """Run all tests in test.py."""
1050
  suite = unittest.TestSuite()