Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
f068a46
1
Parent(s):
2309acf
New default arguments for 0.6.0
Browse files- pysr/sr.py +15 -18
pysr/sr.py
CHANGED
@@ -63,11 +63,11 @@ def pysr(X=None, y=None, weights=None,
|
|
63 |
unary_operators=None,
|
64 |
procs=4,
|
65 |
loss='L2DistLoss()',
|
66 |
-
populations=
|
67 |
niterations=100,
|
68 |
ncyclesperiteration=300,
|
69 |
alpha=0.1,
|
70 |
-
annealing=
|
71 |
fractionReplaced=0.10,
|
72 |
fractionReplacedHof=0.10,
|
73 |
npop=1000,
|
@@ -90,7 +90,7 @@ def pysr(X=None, y=None, weights=None,
|
|
90 |
equation_file=None,
|
91 |
test='simple1',
|
92 |
verbosity=1e9,
|
93 |
-
progress=
|
94 |
maxsize=20,
|
95 |
fast_cycle=False,
|
96 |
maxdepth=None,
|
@@ -100,7 +100,7 @@ def pysr(X=None, y=None, weights=None,
|
|
100 |
select_k_features=None,
|
101 |
warmupMaxsizeBy=0.0,
|
102 |
constraints=None,
|
103 |
-
useFrequency=
|
104 |
tempdir=None,
|
105 |
delete_tempfiles=True,
|
106 |
julia_optimization=3,
|
@@ -109,12 +109,10 @@ def pysr(X=None, y=None, weights=None,
|
|
109 |
update=True,
|
110 |
temp_equation_file=False,
|
111 |
output_jax_format=False,
|
112 |
-
|
113 |
-
nrestarts=None,
|
114 |
-
optimizer_algorithm="NelderMead",
|
115 |
optimizer_nrestarts=3,
|
116 |
-
optimize_probability=0
|
117 |
-
optimizer_iterations=
|
118 |
):
|
119 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
120 |
Note: most default parameters have been tuned over several example
|
@@ -128,9 +126,9 @@ def pysr(X=None, y=None, weights=None,
|
|
128 |
:param weights: np.ndarray, 1D array. Each row is how to weight the
|
129 |
mean-square-error loss on weights.
|
130 |
:param binary_operators: list, List of strings giving the binary operators
|
131 |
-
in Julia's Base. Default is ["
|
132 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
133 |
-
Default is [
|
134 |
:param procs: int, Number of processes (=number of populations running).
|
135 |
:param loss: str, String of Julia code specifying the loss function.
|
136 |
Can either be a loss from LossFunctions.jl, or your own
|
@@ -144,7 +142,7 @@ def pysr(X=None, y=None, weights=None,
|
|
144 |
Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
|
145 |
`SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
|
146 |
`ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
147 |
-
:param populations: int, Number of populations running
|
148 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
149 |
equations are printed, and migrate between populations, at the
|
150 |
end of each.
|
@@ -163,7 +161,6 @@ def pysr(X=None, y=None, weights=None,
|
|
163 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
164 |
constants (Nelder-Mead/Newton) at the end of each iteration.
|
165 |
:param topn: int, How many top individuals migrate from each population.
|
166 |
-
:param nrestarts: int, Number of times to restart the constant optimizer
|
167 |
:param perturbationFactor: float, Constants are perturbed by a max
|
168 |
factor of (perturbationFactor*T + 1). Either multiplied by this
|
169 |
or divided by this.
|
@@ -232,9 +229,9 @@ def pysr(X=None, y=None, weights=None,
|
|
232 |
|
233 |
"""
|
234 |
if binary_operators is None:
|
235 |
-
binary_operators =
|
236 |
if unary_operators is None:
|
237 |
-
unary_operators = [
|
238 |
if extra_sympy_mappings is None:
|
239 |
extra_sympy_mappings = {}
|
240 |
if variable_names is None:
|
@@ -242,7 +239,6 @@ def pysr(X=None, y=None, weights=None,
|
|
242 |
if constraints is None:
|
243 |
constraints = {}
|
244 |
|
245 |
-
assert warmupMaxsize == None, "warmupMaxsize is deprecated. Use warmupMaxsizeBy and give a fraction of time."
|
246 |
if nrestarts != None:
|
247 |
optimizer_nrestarts = nrestarts
|
248 |
|
@@ -265,6 +261,9 @@ def pysr(X=None, y=None, weights=None,
|
|
265 |
if len(X) > 10000 and not batching:
|
266 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
267 |
|
|
|
|
|
|
|
268 |
X, variable_names = _handle_feature_selection(
|
269 |
X, select_k_features,
|
270 |
use_custom_variable_names, variable_names, y
|
@@ -272,8 +271,6 @@ def pysr(X=None, y=None, weights=None,
|
|
272 |
|
273 |
if maxdepth is None:
|
274 |
maxdepth = maxsize
|
275 |
-
if populations is None:
|
276 |
-
populations = procs
|
277 |
if isinstance(binary_operators, str):
|
278 |
binary_operators = [binary_operators]
|
279 |
if isinstance(unary_operators, str):
|
|
|
63 |
unary_operators=None,
|
64 |
procs=4,
|
65 |
loss='L2DistLoss()',
|
66 |
+
populations=20,
|
67 |
niterations=100,
|
68 |
ncyclesperiteration=300,
|
69 |
alpha=0.1,
|
70 |
+
annealing=False,
|
71 |
fractionReplaced=0.10,
|
72 |
fractionReplacedHof=0.10,
|
73 |
npop=1000,
|
|
|
90 |
equation_file=None,
|
91 |
test='simple1',
|
92 |
verbosity=1e9,
|
93 |
+
progress=True,
|
94 |
maxsize=20,
|
95 |
fast_cycle=False,
|
96 |
maxdepth=None,
|
|
|
100 |
select_k_features=None,
|
101 |
warmupMaxsizeBy=0.0,
|
102 |
constraints=None,
|
103 |
+
useFrequency=True,
|
104 |
tempdir=None,
|
105 |
delete_tempfiles=True,
|
106 |
julia_optimization=3,
|
|
|
109 |
update=True,
|
110 |
temp_equation_file=False,
|
111 |
output_jax_format=False,
|
112 |
+
optimizer_algorithm="BFGS",
|
|
|
|
|
113 |
optimizer_nrestarts=3,
|
114 |
+
optimize_probability=1.0,
|
115 |
+
optimizer_iterations=10,
|
116 |
):
|
117 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
118 |
Note: most default parameters have been tuned over several example
|
|
|
126 |
:param weights: np.ndarray, 1D array. Each row is how to weight the
|
127 |
mean-square-error loss on weights.
|
128 |
:param binary_operators: list, List of strings giving the binary operators
|
129 |
+
in Julia's Base. Default is ["+", "-", "*", "/",].
|
130 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
131 |
+
Default is [].
|
132 |
:param procs: int, Number of processes (=number of populations running).
|
133 |
:param loss: str, String of Julia code specifying the loss function.
|
134 |
Can either be a loss from LossFunctions.jl, or your own
|
|
|
142 |
Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
|
143 |
`SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
|
144 |
`ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
145 |
+
:param populations: int, Number of populations running.
|
146 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
147 |
equations are printed, and migrate between populations, at the
|
148 |
end of each.
|
|
|
161 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
162 |
constants (Nelder-Mead/Newton) at the end of each iteration.
|
163 |
:param topn: int, How many top individuals migrate from each population.
|
|
|
164 |
:param perturbationFactor: float, Constants are perturbed by a max
|
165 |
factor of (perturbationFactor*T + 1). Either multiplied by this
|
166 |
or divided by this.
|
|
|
229 |
|
230 |
"""
|
231 |
if binary_operators is None:
|
232 |
+
binary_operators = '+ * - /'.split(' ')
|
233 |
if unary_operators is None:
|
234 |
+
unary_operators = []
|
235 |
if extra_sympy_mappings is None:
|
236 |
extra_sympy_mappings = {}
|
237 |
if variable_names is None:
|
|
|
239 |
if constraints is None:
|
240 |
constraints = {}
|
241 |
|
|
|
242 |
if nrestarts != None:
|
243 |
optimizer_nrestarts = nrestarts
|
244 |
|
|
|
261 |
if len(X) > 10000 and not batching:
|
262 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
263 |
|
264 |
+
if maxsize > 40:
|
265 |
+
warnings.warn("Note: Using a large maxsize for the equation search will be slow and use significant memory. You should consider turning `useFrequency` to False, and perhaps use `warmupMaxsizeBy`.")
|
266 |
+
|
267 |
X, variable_names = _handle_feature_selection(
|
268 |
X, select_k_features,
|
269 |
use_custom_variable_names, variable_names, y
|
|
|
271 |
|
272 |
if maxdepth is None:
|
273 |
maxdepth = maxsize
|
|
|
|
|
274 |
if isinstance(binary_operators, str):
|
275 |
binary_operators = [binary_operators]
|
276 |
if isinstance(unary_operators, str):
|