Spaces:
Running
Running
MilesCranmer
commited on
Merge pull request #95 from MilesCranmer/state-saving
Browse files- README.md +3 -1
- docs/start.md +3 -1
- pysr/sr.py +46 -5
- setup.py +2 -2
- test/test.py +8 -1
README.md
CHANGED
@@ -74,7 +74,7 @@ Most common issues at this stage are solved
|
|
74 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
75 |
to use up-to-date packages.
|
76 |
|
77 |
-
#
|
78 |
|
79 |
Let's create a PySR example. First, let's import
|
80 |
numpy to generate some test data:
|
@@ -144,6 +144,8 @@ This arrow in the `pick` column indicates which equation is currently selected b
|
|
144 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
145 |
(both of which are differentiable).
|
146 |
|
|
|
|
|
147 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
148 |
feature selection (e.g., `select_k_features=3`).
|
149 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
|
|
74 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
75 |
to use up-to-date packages.
|
76 |
|
77 |
+
# Introduction
|
78 |
|
79 |
Let's create a PySR example. First, let's import
|
80 |
numpy to generate some test data:
|
|
|
144 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
145 |
(both of which are differentiable).
|
146 |
|
147 |
+
Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`. This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
|
148 |
+
|
149 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
150 |
feature selection (e.g., `select_k_features=3`).
|
151 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
docs/start.md
CHANGED
@@ -19,7 +19,7 @@ Most common issues at this stage are solved
|
|
19 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
20 |
to use up-to-date packages.
|
21 |
|
22 |
-
#
|
23 |
|
24 |
Let's create a PySR example. First, let's import
|
25 |
numpy to generate some test data:
|
@@ -89,6 +89,8 @@ This arrow in the `pick` column indicates which equation is currently selected b
|
|
89 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
90 |
(both of which are differentiable).
|
91 |
|
|
|
|
|
92 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
93 |
feature selection (e.g., `select_k_features=3`).
|
94 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
|
|
19 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
20 |
to use up-to-date packages.
|
21 |
|
22 |
+
# Introduction
|
23 |
|
24 |
Let's create a PySR example. First, let's import
|
25 |
numpy to generate some test data:
|
|
|
89 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
90 |
(both of which are differentiable).
|
91 |
|
92 |
+
Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`. This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
|
93 |
+
|
94 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
95 |
feature selection (e.g., `select_k_features=3`).
|
96 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
pysr/sr.py
CHANGED
@@ -12,6 +12,8 @@ from datetime import datetime
|
|
12 |
import warnings
|
13 |
from multiprocessing import cpu_count
|
14 |
from sklearn.base import BaseEstimator, RegressorMixin
|
|
|
|
|
15 |
|
16 |
is_julia_warning_silenced = False
|
17 |
|
@@ -320,7 +322,7 @@ def _write_project_file(tmp_dir):
|
|
320 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
321 |
|
322 |
[compat]
|
323 |
-
SymbolicRegression = "0.7.
|
324 |
julia = "1.5"
|
325 |
"""
|
326 |
|
@@ -636,9 +638,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
636 |
|
637 |
# Stored equations:
|
638 |
self.equations = None
|
|
|
|
|
639 |
|
640 |
self.multioutput = None
|
641 |
-
self.raw_julia_output = None
|
642 |
self.equation_file = equation_file
|
643 |
self.n_features = None
|
644 |
self.extra_sympy_mappings = extra_sympy_mappings
|
@@ -654,7 +657,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
654 |
self.surface_parameters = [
|
655 |
"model_selection",
|
656 |
"multioutput",
|
657 |
-
"raw_julia_output",
|
658 |
"equation_file",
|
659 |
"n_features",
|
660 |
"extra_sympy_mappings",
|
@@ -727,7 +729,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
727 |
else:
|
728 |
self.params[key] = value
|
729 |
|
730 |
-
self.refresh()
|
731 |
return self
|
732 |
|
733 |
def get_params(self, deep=True):
|
@@ -858,6 +859,12 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
858 |
return [eq["torch_format"] for eq in best]
|
859 |
return best["torch_format"]
|
860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
861 |
def _run(self, X, y, weights, variable_names):
|
862 |
global already_ran
|
863 |
global Main
|
@@ -1046,6 +1053,38 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1046 |
float(weightDoNothing),
|
1047 |
]
|
1048 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1049 |
options = Main.Options(
|
1050 |
binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
|
1051 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
@@ -1085,6 +1124,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1085 |
optimizer_iterations=self.params["optimizer_iterations"],
|
1086 |
perturbationFactor=self.params["perturbationFactor"],
|
1087 |
annealing=self.params["annealing"],
|
|
|
1088 |
)
|
1089 |
|
1090 |
np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[
|
@@ -1106,7 +1146,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1106 |
|
1107 |
cprocs = 0 if multithreading else procs
|
1108 |
|
1109 |
-
self.
|
1110 |
Main.X,
|
1111 |
Main.y,
|
1112 |
weights=Main.weights,
|
@@ -1119,6 +1159,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
1119 |
options=options,
|
1120 |
numprocs=int(cprocs),
|
1121 |
multithreading=bool(multithreading),
|
|
|
1122 |
)
|
1123 |
|
1124 |
self.variable_names = variable_names
|
|
|
12 |
import warnings
|
13 |
from multiprocessing import cpu_count
|
14 |
from sklearn.base import BaseEstimator, RegressorMixin
|
15 |
+
from collections import OrderedDict
|
16 |
+
from hashlib import sha256
|
17 |
|
18 |
is_julia_warning_silenced = False
|
19 |
|
|
|
322 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
323 |
|
324 |
[compat]
|
325 |
+
SymbolicRegression = "0.7.3"
|
326 |
julia = "1.5"
|
327 |
"""
|
328 |
|
|
|
638 |
|
639 |
# Stored equations:
|
640 |
self.equations = None
|
641 |
+
self.params_hash = None
|
642 |
+
self.raw_julia_state = None
|
643 |
|
644 |
self.multioutput = None
|
|
|
645 |
self.equation_file = equation_file
|
646 |
self.n_features = None
|
647 |
self.extra_sympy_mappings = extra_sympy_mappings
|
|
|
657 |
self.surface_parameters = [
|
658 |
"model_selection",
|
659 |
"multioutput",
|
|
|
660 |
"equation_file",
|
661 |
"n_features",
|
662 |
"extra_sympy_mappings",
|
|
|
729 |
else:
|
730 |
self.params[key] = value
|
731 |
|
|
|
732 |
return self
|
733 |
|
734 |
def get_params(self, deep=True):
|
|
|
859 |
return [eq["torch_format"] for eq in best]
|
860 |
return best["torch_format"]
|
861 |
|
862 |
+
def reset(self):
|
863 |
+
"""Reset the search state."""
|
864 |
+
self.equations = None
|
865 |
+
self.params_hash = None
|
866 |
+
self.raw_julia_state = None
|
867 |
+
|
868 |
def _run(self, X, y, weights, variable_names):
|
869 |
global already_ran
|
870 |
global Main
|
|
|
1053 |
float(weightDoNothing),
|
1054 |
]
|
1055 |
|
1056 |
+
params_to_hash = {
|
1057 |
+
**{k: self.__getattribute__(k) for k in self.surface_parameters},
|
1058 |
+
**self.params,
|
1059 |
+
}
|
1060 |
+
params_excluded_from_hash = [
|
1061 |
+
"niterations",
|
1062 |
+
]
|
1063 |
+
# Delete these^ from params_to_hash:
|
1064 |
+
params_to_hash = {
|
1065 |
+
k: v
|
1066 |
+
for k, v in params_to_hash.items()
|
1067 |
+
if k not in params_excluded_from_hash
|
1068 |
+
}
|
1069 |
+
|
1070 |
+
# Sort params_to_hash by key:
|
1071 |
+
params_to_hash = OrderedDict(sorted(params_to_hash.items()))
|
1072 |
+
# Hash all parameters:
|
1073 |
+
cur_hash = sha256(str(params_to_hash).encode()).hexdigest()
|
1074 |
+
|
1075 |
+
if self.params_hash is not None:
|
1076 |
+
if cur_hash != self.params_hash:
|
1077 |
+
warnings.warn(
|
1078 |
+
"Warning: PySR options have changed since the last run. "
|
1079 |
+
"This is experimental and may not work. "
|
1080 |
+
"For example, if the operators change, or even their order,"
|
1081 |
+
" the saved equations will be in the wrong format."
|
1082 |
+
"\n\n"
|
1083 |
+
"To reset the search state, run `.reset()`. "
|
1084 |
+
)
|
1085 |
+
|
1086 |
+
self.params_hash = cur_hash
|
1087 |
+
|
1088 |
options = Main.Options(
|
1089 |
binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
|
1090 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
|
|
1124 |
optimizer_iterations=self.params["optimizer_iterations"],
|
1125 |
perturbationFactor=self.params["perturbationFactor"],
|
1126 |
annealing=self.params["annealing"],
|
1127 |
+
stateReturn=True, # Required for state saving.
|
1128 |
)
|
1129 |
|
1130 |
np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[
|
|
|
1146 |
|
1147 |
cprocs = 0 if multithreading else procs
|
1148 |
|
1149 |
+
self.raw_julia_state = Main.EquationSearch(
|
1150 |
Main.X,
|
1151 |
Main.y,
|
1152 |
weights=Main.weights,
|
|
|
1159 |
options=options,
|
1160 |
numprocs=int(cprocs),
|
1161 |
multithreading=bool(multithreading),
|
1162 |
+
saved_state=self.raw_julia_state,
|
1163 |
)
|
1164 |
|
1165 |
self.variable_names = variable_names
|
setup.py
CHANGED
@@ -8,14 +8,14 @@ except FileNotFoundError:
|
|
8 |
|
9 |
setuptools.setup(
|
10 |
name="pysr",
|
11 |
-
version="0.7.0",
|
12 |
author="Miles Cranmer",
|
13 |
author_email="[email protected]",
|
14 |
description="Simple and efficient symbolic regression",
|
15 |
long_description=long_description,
|
16 |
long_description_content_type="text/markdown",
|
17 |
url="https://github.com/MilesCranmer/pysr",
|
18 |
-
install_requires=["julia", "numpy", "pandas", "sympy", "scikit-learn"],
|
19 |
packages=setuptools.find_packages(),
|
20 |
package_data={"pysr": ["../Project.toml", "../datasets/*"]},
|
21 |
include_package_data=False,
|
|
|
8 |
|
9 |
setuptools.setup(
|
10 |
name="pysr",
|
11 |
+
version="0.7.0-1",
|
12 |
author="Miles Cranmer",
|
13 |
author_email="[email protected]",
|
14 |
description="Simple and efficient symbolic regression",
|
15 |
long_description=long_description,
|
16 |
long_description_content_type="text/markdown",
|
17 |
url="https://github.com/MilesCranmer/pysr",
|
18 |
+
install_requires=["julia>=0.5.7", "numpy", "pandas", "sympy", "scikit-learn"],
|
19 |
packages=setuptools.find_packages(),
|
20 |
package_data={"pysr": ["../Project.toml", "../datasets/*"]},
|
21 |
include_package_data=False,
|
test/test.py
CHANGED
@@ -77,7 +77,7 @@ class TestPipeline(unittest.TestCase):
|
|
77 |
model.predict(self.X)[:, 1], self.X[:, 1] ** 2, decimal=4
|
78 |
)
|
79 |
|
80 |
-
def
|
81 |
X = np.random.randn(100, 1)
|
82 |
y = X[:, 0] + 3.0
|
83 |
regressor = PySRRegressor(
|
@@ -94,6 +94,13 @@ class TestPipeline(unittest.TestCase):
|
|
94 |
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
95 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Tweak model selection:
|
98 |
regressor.set_params(model_selection="best")
|
99 |
self.assertEqual(regressor.get_params()["model_selection"], "best")
|
|
|
77 |
model.predict(self.X)[:, 1], self.X[:, 1] ** 2, decimal=4
|
78 |
)
|
79 |
|
80 |
+
def test_empty_operators_single_input_multirun(self):
|
81 |
X = np.random.randn(100, 1)
|
82 |
y = X[:, 0] + 3.0
|
83 |
regressor = PySRRegressor(
|
|
|
94 |
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
95 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
96 |
|
97 |
+
# Test if repeated fit works:
|
98 |
+
regressor.set_params(niterations=0)
|
99 |
+
regressor.fit(X, y)
|
100 |
+
|
101 |
+
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
102 |
+
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
103 |
+
|
104 |
# Tweak model selection:
|
105 |
regressor.set_params(model_selection="best")
|
106 |
self.assertEqual(regressor.get_params()["model_selection"], "best")
|