Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Mar 21, 2024

Commit

502e3ec

unverified ·

1 Parent(s): 3ffd1fe

Add `bumper` parameter

Browse files

Files changed (6) hide show

README.md +1 -1
docs/tuning.md +1 -1
pysr/julia_extensions.py +2 -0
pysr/param_groupings.yml +1 -0
pysr/sr.py +8 -0
pysr/test/test.py +2 -1

README.md CHANGED Viewed

@@ -287,7 +287,7 @@ model = PySRRegressor(
     # ^ Higher precision calculations.
     warm_start=True,
     # ^ Start from where left off.
-    turbo=True,
     # ^ Faster evaluation (experimental)
     julia_project=None,
     # ^ Can set to the path of a folder containing the

     # ^ Higher precision calculations.
     warm_start=True,
     # ^ Start from where left off.
+    bumper=True,
     # ^ Faster evaluation (experimental)
     julia_project=None,
     # ^ Can set to the path of a folder containing the

docs/tuning.md CHANGED Viewed

@@ -20,7 +20,7 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
 8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
 9.  Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
-11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
 12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
 Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.

 8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
 9.  Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
+11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup.
 12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
 Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.

pysr/julia_extensions.py CHANGED Viewed

@@ -8,6 +8,8 @@ def load_required_packages(
 ):
     if turbo:
         load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
     if enable_autodiff:
         load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
     if cluster_manager is not None:

 ):
     if turbo:
         load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
+    if bumper:
+        load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
     if enable_autodiff:
         load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
     if cluster_manager is not None:

pysr/param_groupings.yml CHANGED Viewed

@@ -74,6 +74,7 @@
   - precision
   - fast_cycle
   - turbo
   - enable_autodiff
 - Determinism:
   - random_state

   - precision
   - fast_cycle
   - turbo
+  - bumper
   - enable_autodiff
 - Determinism:
   - random_state

pysr/sr.py CHANGED Viewed

@@ -484,6 +484,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         search evaluation. Certain operators may not be supported.
         Does not support 16-bit precision floats.
         Default is `False`.
     precision : int
         What precision to use for the data. By default this is `32`
         (float32), but you can select `64` or `16` as well, giving
@@ -727,6 +731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         batch_size: int = 50,
         fast_cycle: bool = False,
         turbo: bool = False,
         precision: int = 32,
         enable_autodiff: bool = False,
         random_state=None,
@@ -822,6 +827,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.batch_size = batch_size
         self.fast_cycle = fast_cycle
         self.turbo = turbo
         self.precision = precision
         self.enable_autodiff = enable_autodiff
         self.random_state = random_state
@@ -1609,6 +1615,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         load_required_packages(
             turbo=self.turbo,
             enable_autodiff=self.enable_autodiff,
             cluster_manager=cluster_manager,
         )
@@ -1654,6 +1661,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
             turbo=self.turbo,
             enable_autodiff=self.enable_autodiff,
             migration=self.migration,
             hof_migration=self.hof_migration,

         search evaluation. Certain operators may not be supported.
         Does not support 16-bit precision floats.
         Default is `False`.
+    bumper: bool
+        (Experimental) Whether to use Bumper.jl to speed up the search
+        evaluation. Does not support 16-bit precision floats.
+        Default is `False`.
     precision : int
         What precision to use for the data. By default this is `32`
         (float32), but you can select `64` or `16` as well, giving
         batch_size: int = 50,
         fast_cycle: bool = False,
         turbo: bool = False,
+        bumper: bool = False,
         precision: int = 32,
         enable_autodiff: bool = False,
         random_state=None,
         self.batch_size = batch_size
         self.fast_cycle = fast_cycle
         self.turbo = turbo
+        self.bumper = bumper
         self.precision = precision
         self.enable_autodiff = enable_autodiff
         self.random_state = random_state
         load_required_packages(
             turbo=self.turbo,
+            bumper=self.bumper,
             enable_autodiff=self.enable_autodiff,
             cluster_manager=cluster_manager,
         )
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
             turbo=self.turbo,
+            bumper=self.bumper,
             enable_autodiff=self.enable_autodiff,
             migration=self.migration,
             hof_migration=self.hof_migration,

pysr/test/test.py CHANGED Viewed

@@ -58,12 +58,13 @@ class TestPipeline(unittest.TestCase):
         model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
         self.assertIn("c1", model.equations_.iloc[-1]["equation"])
-    def test_linear_relation_weighted(self):
         y = self.X[:, 0]
         weights = np.ones_like(y)
         model = PySRRegressor(
             **self.default_test_kwargs,
             early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
         )
         model.fit(self.X, y, weights=weights)
         print(model.equations_)

         model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
         self.assertIn("c1", model.equations_.iloc[-1]["equation"])
+    def test_linear_relation_weighted_bumper(self):
         y = self.X[:, 0]
         weights = np.ones_like(y)
         model = PySRRegressor(
             **self.default_test_kwargs,
             early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
+            bumper=True,
         )
         model.fit(self.X, y, weights=weights)
         print(model.equations_)