Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
502e3ec
1
Parent(s):
3ffd1fe
Add `bumper` parameter
Browse files- README.md +1 -1
- docs/tuning.md +1 -1
- pysr/julia_extensions.py +2 -0
- pysr/param_groupings.yml +1 -0
- pysr/sr.py +8 -0
- pysr/test/test.py +2 -1
README.md
CHANGED
@@ -287,7 +287,7 @@ model = PySRRegressor(
|
|
287 |
# ^ Higher precision calculations.
|
288 |
warm_start=True,
|
289 |
# ^ Start from where left off.
|
290 |
-
|
291 |
# ^ Faster evaluation (experimental)
|
292 |
julia_project=None,
|
293 |
# ^ Can set to the path of a folder containing the
|
|
|
287 |
# ^ Higher precision calculations.
|
288 |
warm_start=True,
|
289 |
# ^ Start from where left off.
|
290 |
+
bumper=True,
|
291 |
# ^ Faster evaluation (experimental)
|
292 |
julia_project=None,
|
293 |
# ^ Can set to the path of a folder containing the
|
docs/tuning.md
CHANGED
@@ -20,7 +20,7 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
|
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
23 |
-
11. Set `
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
|
|
20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
23 |
+
11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup.
|
24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
25 |
|
26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
pysr/julia_extensions.py
CHANGED
@@ -8,6 +8,8 @@ def load_required_packages(
|
|
8 |
):
|
9 |
if turbo:
|
10 |
load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
|
|
|
|
|
11 |
if enable_autodiff:
|
12 |
load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
|
13 |
if cluster_manager is not None:
|
|
|
8 |
):
|
9 |
if turbo:
|
10 |
load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
|
11 |
+
if bumper:
|
12 |
+
load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
|
13 |
if enable_autodiff:
|
14 |
load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
|
15 |
if cluster_manager is not None:
|
pysr/param_groupings.yml
CHANGED
@@ -74,6 +74,7 @@
|
|
74 |
- precision
|
75 |
- fast_cycle
|
76 |
- turbo
|
|
|
77 |
- enable_autodiff
|
78 |
- Determinism:
|
79 |
- random_state
|
|
|
74 |
- precision
|
75 |
- fast_cycle
|
76 |
- turbo
|
77 |
+
- bumper
|
78 |
- enable_autodiff
|
79 |
- Determinism:
|
80 |
- random_state
|
pysr/sr.py
CHANGED
@@ -484,6 +484,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
484 |
search evaluation. Certain operators may not be supported.
|
485 |
Does not support 16-bit precision floats.
|
486 |
Default is `False`.
|
|
|
|
|
|
|
|
|
487 |
precision : int
|
488 |
What precision to use for the data. By default this is `32`
|
489 |
(float32), but you can select `64` or `16` as well, giving
|
@@ -727,6 +731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
727 |
batch_size: int = 50,
|
728 |
fast_cycle: bool = False,
|
729 |
turbo: bool = False,
|
|
|
730 |
precision: int = 32,
|
731 |
enable_autodiff: bool = False,
|
732 |
random_state=None,
|
@@ -822,6 +827,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
822 |
self.batch_size = batch_size
|
823 |
self.fast_cycle = fast_cycle
|
824 |
self.turbo = turbo
|
|
|
825 |
self.precision = precision
|
826 |
self.enable_autodiff = enable_autodiff
|
827 |
self.random_state = random_state
|
@@ -1609,6 +1615,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1609 |
|
1610 |
load_required_packages(
|
1611 |
turbo=self.turbo,
|
|
|
1612 |
enable_autodiff=self.enable_autodiff,
|
1613 |
cluster_manager=cluster_manager,
|
1614 |
)
|
@@ -1654,6 +1661,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1654 |
maxdepth=maxdepth,
|
1655 |
fast_cycle=self.fast_cycle,
|
1656 |
turbo=self.turbo,
|
|
|
1657 |
enable_autodiff=self.enable_autodiff,
|
1658 |
migration=self.migration,
|
1659 |
hof_migration=self.hof_migration,
|
|
|
484 |
search evaluation. Certain operators may not be supported.
|
485 |
Does not support 16-bit precision floats.
|
486 |
Default is `False`.
|
487 |
+
bumper: bool
|
488 |
+
(Experimental) Whether to use Bumper.jl to speed up the search
|
489 |
+
evaluation. Does not support 16-bit precision floats.
|
490 |
+
Default is `False`.
|
491 |
precision : int
|
492 |
What precision to use for the data. By default this is `32`
|
493 |
(float32), but you can select `64` or `16` as well, giving
|
|
|
731 |
batch_size: int = 50,
|
732 |
fast_cycle: bool = False,
|
733 |
turbo: bool = False,
|
734 |
+
bumper: bool = False,
|
735 |
precision: int = 32,
|
736 |
enable_autodiff: bool = False,
|
737 |
random_state=None,
|
|
|
827 |
self.batch_size = batch_size
|
828 |
self.fast_cycle = fast_cycle
|
829 |
self.turbo = turbo
|
830 |
+
self.bumper = bumper
|
831 |
self.precision = precision
|
832 |
self.enable_autodiff = enable_autodiff
|
833 |
self.random_state = random_state
|
|
|
1615 |
|
1616 |
load_required_packages(
|
1617 |
turbo=self.turbo,
|
1618 |
+
bumper=self.bumper,
|
1619 |
enable_autodiff=self.enable_autodiff,
|
1620 |
cluster_manager=cluster_manager,
|
1621 |
)
|
|
|
1661 |
maxdepth=maxdepth,
|
1662 |
fast_cycle=self.fast_cycle,
|
1663 |
turbo=self.turbo,
|
1664 |
+
bumper=self.bumper,
|
1665 |
enable_autodiff=self.enable_autodiff,
|
1666 |
migration=self.migration,
|
1667 |
hof_migration=self.hof_migration,
|
pysr/test/test.py
CHANGED
@@ -58,12 +58,13 @@ class TestPipeline(unittest.TestCase):
|
|
58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
60 |
|
61 |
-
def
|
62 |
y = self.X[:, 0]
|
63 |
weights = np.ones_like(y)
|
64 |
model = PySRRegressor(
|
65 |
**self.default_test_kwargs,
|
66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
|
|
67 |
)
|
68 |
model.fit(self.X, y, weights=weights)
|
69 |
print(model.equations_)
|
|
|
58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
60 |
|
61 |
+
def test_linear_relation_weighted_bumper(self):
|
62 |
y = self.X[:, 0]
|
63 |
weights = np.ones_like(y)
|
64 |
model = PySRRegressor(
|
65 |
**self.default_test_kwargs,
|
66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
67 |
+
bumper=True,
|
68 |
)
|
69 |
model.fit(self.X, y, weights=weights)
|
70 |
print(model.equations_)
|