MilesCranmer commited on
Commit
f068a46
1 Parent(s): 2309acf

New default arguments for 0.6.0

Browse files
Files changed (1) hide show
  1. pysr/sr.py +15 -18
pysr/sr.py CHANGED
@@ -63,11 +63,11 @@ def pysr(X=None, y=None, weights=None,
63
  unary_operators=None,
64
  procs=4,
65
  loss='L2DistLoss()',
66
- populations=None,
67
  niterations=100,
68
  ncyclesperiteration=300,
69
  alpha=0.1,
70
- annealing=True,
71
  fractionReplaced=0.10,
72
  fractionReplacedHof=0.10,
73
  npop=1000,
@@ -90,7 +90,7 @@ def pysr(X=None, y=None, weights=None,
90
  equation_file=None,
91
  test='simple1',
92
  verbosity=1e9,
93
- progress=False,
94
  maxsize=20,
95
  fast_cycle=False,
96
  maxdepth=None,
@@ -100,7 +100,7 @@ def pysr(X=None, y=None, weights=None,
100
  select_k_features=None,
101
  warmupMaxsizeBy=0.0,
102
  constraints=None,
103
- useFrequency=False,
104
  tempdir=None,
105
  delete_tempfiles=True,
106
  julia_optimization=3,
@@ -109,12 +109,10 @@ def pysr(X=None, y=None, weights=None,
109
  update=True,
110
  temp_equation_file=False,
111
  output_jax_format=False,
112
- warmupMaxsize=None, #Deprecated
113
- nrestarts=None,
114
- optimizer_algorithm="NelderMead",
115
  optimizer_nrestarts=3,
116
- optimize_probability=0.1,
117
- optimizer_iterations=100,
118
  ):
119
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
120
  Note: most default parameters have been tuned over several example
@@ -128,9 +126,9 @@ def pysr(X=None, y=None, weights=None,
128
  :param weights: np.ndarray, 1D array. Each row is how to weight the
129
  mean-square-error loss on weights.
130
  :param binary_operators: list, List of strings giving the binary operators
131
- in Julia's Base. Default is ["plus", "mult"].
132
  :param unary_operators: list, Same but for operators taking a single scalar.
133
- Default is ["cos", "exp", "sin"].
134
  :param procs: int, Number of processes (=number of populations running).
135
  :param loss: str, String of Julia code specifying the loss function.
136
  Can either be a loss from LossFunctions.jl, or your own
@@ -144,7 +142,7 @@ def pysr(X=None, y=None, weights=None,
144
  Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
145
  `SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
146
  `ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
147
- :param populations: int, Number of populations running; by default=procs.
148
  :param niterations: int, Number of iterations of the algorithm to run. The best
149
  equations are printed, and migrate between populations, at the
150
  end of each.
@@ -163,7 +161,6 @@ def pysr(X=None, y=None, weights=None,
163
  :param shouldOptimizeConstants: bool, Whether to numerically optimize
164
  constants (Nelder-Mead/Newton) at the end of each iteration.
165
  :param topn: int, How many top individuals migrate from each population.
166
- :param nrestarts: int, Number of times to restart the constant optimizer
167
  :param perturbationFactor: float, Constants are perturbed by a max
168
  factor of (perturbationFactor*T + 1). Either multiplied by this
169
  or divided by this.
@@ -232,9 +229,9 @@ def pysr(X=None, y=None, weights=None,
232
 
233
  """
234
  if binary_operators is None:
235
- binary_operators = ["plus", "mult"]
236
  if unary_operators is None:
237
- unary_operators = ["cos", "exp", "sin"]
238
  if extra_sympy_mappings is None:
239
  extra_sympy_mappings = {}
240
  if variable_names is None:
@@ -242,7 +239,6 @@ def pysr(X=None, y=None, weights=None,
242
  if constraints is None:
243
  constraints = {}
244
 
245
- assert warmupMaxsize == None, "warmupMaxsize is deprecated. Use warmupMaxsizeBy and give a fraction of time."
246
  if nrestarts != None:
247
  optimizer_nrestarts = nrestarts
248
 
@@ -265,6 +261,9 @@ def pysr(X=None, y=None, weights=None,
265
  if len(X) > 10000 and not batching:
266
  warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
267
 
 
 
 
268
  X, variable_names = _handle_feature_selection(
269
  X, select_k_features,
270
  use_custom_variable_names, variable_names, y
@@ -272,8 +271,6 @@ def pysr(X=None, y=None, weights=None,
272
 
273
  if maxdepth is None:
274
  maxdepth = maxsize
275
- if populations is None:
276
- populations = procs
277
  if isinstance(binary_operators, str):
278
  binary_operators = [binary_operators]
279
  if isinstance(unary_operators, str):
 
63
  unary_operators=None,
64
  procs=4,
65
  loss='L2DistLoss()',
66
+ populations=20,
67
  niterations=100,
68
  ncyclesperiteration=300,
69
  alpha=0.1,
70
+ annealing=False,
71
  fractionReplaced=0.10,
72
  fractionReplacedHof=0.10,
73
  npop=1000,
 
90
  equation_file=None,
91
  test='simple1',
92
  verbosity=1e9,
93
+ progress=True,
94
  maxsize=20,
95
  fast_cycle=False,
96
  maxdepth=None,
 
100
  select_k_features=None,
101
  warmupMaxsizeBy=0.0,
102
  constraints=None,
103
+ useFrequency=True,
104
  tempdir=None,
105
  delete_tempfiles=True,
106
  julia_optimization=3,
 
109
  update=True,
110
  temp_equation_file=False,
111
  output_jax_format=False,
112
+ optimizer_algorithm="BFGS",
 
 
113
  optimizer_nrestarts=3,
114
+ optimize_probability=1.0,
115
+ optimizer_iterations=10,
116
  ):
117
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
118
  Note: most default parameters have been tuned over several example
 
126
  :param weights: np.ndarray, 1D array. Each row is how to weight the
127
  mean-square-error loss on weights.
128
  :param binary_operators: list, List of strings giving the binary operators
129
+ in Julia's Base. Default is ["+", "-", "*", "/",].
130
  :param unary_operators: list, Same but for operators taking a single scalar.
131
+ Default is [].
132
  :param procs: int, Number of processes (=number of populations running).
133
  :param loss: str, String of Julia code specifying the loss function.
134
  Can either be a loss from LossFunctions.jl, or your own
 
142
  Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
143
  `SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
144
  `ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
145
+ :param populations: int, Number of populations running.
146
  :param niterations: int, Number of iterations of the algorithm to run. The best
147
  equations are printed, and migrate between populations, at the
148
  end of each.
 
161
  :param shouldOptimizeConstants: bool, Whether to numerically optimize
162
  constants (Nelder-Mead/Newton) at the end of each iteration.
163
  :param topn: int, How many top individuals migrate from each population.
 
164
  :param perturbationFactor: float, Constants are perturbed by a max
165
  factor of (perturbationFactor*T + 1). Either multiplied by this
166
  or divided by this.
 
229
 
230
  """
231
  if binary_operators is None:
232
+ binary_operators = '+ * - /'.split(' ')
233
  if unary_operators is None:
234
+ unary_operators = []
235
  if extra_sympy_mappings is None:
236
  extra_sympy_mappings = {}
237
  if variable_names is None:
 
239
  if constraints is None:
240
  constraints = {}
241
 
 
242
  if nrestarts != None:
243
  optimizer_nrestarts = nrestarts
244
 
 
261
  if len(X) > 10000 and not batching:
262
  warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
263
 
264
+ if maxsize > 40:
265
+ warnings.warn("Note: Using a large maxsize for the equation search will be slow and use significant memory. You should consider turning `useFrequency` to False, and perhaps use `warmupMaxsizeBy`.")
266
+
267
  X, variable_names = _handle_feature_selection(
268
  X, select_k_features,
269
  use_custom_variable_names, variable_names, y
 
271
 
272
  if maxdepth is None:
273
  maxdepth = maxsize
 
 
274
  if isinstance(binary_operators, str):
275
  binary_operators = [binary_operators]
276
  if isinstance(unary_operators, str):