Spaces:
Running
Running
MilesCranmer
commited on
Commit
路
c915ce2
1
Parent(s):
9ce590d
Fix tabs in docstring
Browse files- pysr/sr.py +60 -61
pysr/sr.py
CHANGED
@@ -148,119 +148,118 @@ def pysr(X, y, weights=None,
|
|
148 |
`binary_operators`, `unary_operators` to your requirements.
|
149 |
|
150 |
:param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
|
151 |
-
|
152 |
-
|
153 |
:param y: np.ndarray, 1D array (rows are examples) or 2D array (rows
|
154 |
-
|
155 |
-
|
156 |
:param weights: np.ndarray, same shape as y. Each element is how to
|
157 |
-
|
158 |
-
|
159 |
:param binary_operators: list, List of strings giving the binary operators
|
160 |
-
|
161 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
162 |
-
|
163 |
:param procs: int, Number of processes (=number of populations running).
|
164 |
:param loss: str, String of Julia code specifying the loss function.
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
:param populations: int, Number of populations running.
|
177 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
178 |
-
|
179 |
-
|
180 |
:param ncyclesperiteration: int, Number of total mutations to run, per 10
|
181 |
-
|
182 |
:param alpha: float, Initial temperature.
|
183 |
:param annealing: bool, Whether to use annealing. You should (and it is default).
|
184 |
:param fractionReplaced: float, How much of population to replace with migrating
|
185 |
-
|
186 |
:param fractionReplacedHof: float, How much of population to replace with migrating
|
187 |
-
|
188 |
:param npop: int, Number of individuals in each population
|
189 |
:param parsimony: float, Multiplicative factor for how much to punish complexity.
|
190 |
:param migration: bool, Whether to migrate.
|
191 |
:param hofMigration: bool, Whether to have the hall of fame migrate.
|
192 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
193 |
-
|
194 |
:param topn: int, How many top individuals migrate from each population.
|
195 |
:param perturbationFactor: float, Constants are perturbed by a max
|
196 |
-
|
197 |
-
|
198 |
:param weightAddNode: float, Relative likelihood for mutation to add a node
|
199 |
:param weightInsertNode: float, Relative likelihood for mutation to insert a node
|
200 |
:param weightDeleteNode: float, Relative likelihood for mutation to delete a node
|
201 |
:param weightDoNothing: float, Relative likelihood for mutation to leave the individual
|
202 |
:param weightMutateConstant: float, Relative likelihood for mutation to change
|
203 |
-
|
204 |
:param weightMutateOperator: float, Relative likelihood for mutation to swap
|
205 |
-
|
206 |
:param weightRandomize: float, Relative likelihood for mutation to completely
|
207 |
-
|
208 |
:param weightSimplify: float, Relative likelihood for mutation to simplify
|
209 |
-
|
210 |
:param timeout: float, Time in seconds to timeout search
|
211 |
:param equation_file: str, Where to save the files (.csv separated by |)
|
212 |
:param verbosity: int, What verbosity level to use. 0 means minimal print statements.
|
213 |
:param progress: bool, Whether to use a progress bar instead of printing to stdout.
|
214 |
:param maxsize: int, Max size of an equation.
|
215 |
:param maxdepth: int, Max depth of an equation. You can use both maxsize and maxdepth.
|
216 |
-
|
217 |
:param fast_cycle: bool, (experimental) - batch over population subsamples. This
|
218 |
-
|
219 |
-
|
220 |
:param variable_names: list, a list of names for the variables, other
|
221 |
-
|
222 |
:param batching: bool, whether to compare population members on small batches
|
223 |
-
|
224 |
-
|
225 |
:param batchSize: int, the amount of data to use if doing batching.
|
226 |
:param select_k_features: (None, int), whether to run feature selection in
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
:param warmupMaxsizeBy: float, whether to slowly increase max size from
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
:param constraints: dict of int (unary) or 2-tuples (binary),
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
:param useFrequency: bool, whether to measure the frequency of complexities,
|
240 |
-
|
241 |
-
|
242 |
:param julia_optimization: int, Optimization level (0, 1, 2, 3)
|
243 |
:param tempdir: str or None, directory for the temporary files
|
244 |
:param delete_tempfiles: bool, whether to delete the temporary files after finishing
|
245 |
:param julia_project: str or None, a Julia environment location containing
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
:param user_input: Whether to ask for user input or not for installing (to
|
250 |
-
|
251 |
:param update: Whether to automatically update Julia packages.
|
252 |
:param temp_equation_file: Whether to put the hall of fame file in
|
253 |
-
|
254 |
-
|
255 |
:param output_jax_format: Whether to create a 'jax_format' column in the output,
|
256 |
-
|
257 |
:param output_torch_format: Whether to create a 'torch_format' column in the output,
|
258 |
-
|
259 |
:returns: pd.DataFrame or list, Results dataframe,
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
"""
|
265 |
if binary_operators is None:
|
266 |
binary_operators = '+ * - /'.split(' ')
|
|
|
148 |
`binary_operators`, `unary_operators` to your requirements.
|
149 |
|
150 |
:param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
|
151 |
+
columns are features. If pandas DataFrame, the columns are used
|
152 |
+
for variable names (so make sure they don't contain spaces).
|
153 |
:param y: np.ndarray, 1D array (rows are examples) or 2D array (rows
|
154 |
+
are examples, columns are outputs). Putting in a 2D array will
|
155 |
+
trigger a search for equations for each feature of y.
|
156 |
:param weights: np.ndarray, same shape as y. Each element is how to
|
157 |
+
weight the mean-square-error loss for that particular element
|
158 |
+
of y.
|
159 |
:param binary_operators: list, List of strings giving the binary operators
|
160 |
+
in Julia's Base. Default is ["+", "-", "*", "/",].
|
161 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
162 |
+
Default is [].
|
163 |
:param procs: int, Number of processes (=number of populations running).
|
164 |
:param loss: str, String of Julia code specifying the loss function.
|
165 |
+
Can either be a loss from LossFunctions.jl, or your own
|
166 |
+
loss written as a function. Examples of custom written losses
|
167 |
+
include: `myloss(x, y) = abs(x-y)` for non-weighted, or
|
168 |
+
`myloss(x, y, w) = w*abs(x-y)` for weighted.
|
169 |
+
Among the included losses, these are as follows. Regression:
|
170 |
+
`LPDistLoss{P}()`, `L1DistLoss()`, `L2DistLoss()` (mean square),
|
171 |
+
`LogitDistLoss()`, `HuberLoss(d)`, `L1EpsilonInsLoss(系)`,
|
172 |
+
`L2EpsilonInsLoss(系)`, `PeriodicLoss(c)`, `QuantileLoss(蟿)`.
|
173 |
+
Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
|
174 |
+
`SmoothedL1HingeLoss(纬)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
|
175 |
+
`ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
176 |
:param populations: int, Number of populations running.
|
177 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
178 |
+
equations are printed, and migrate between populations, at the
|
179 |
+
end of each.
|
180 |
:param ncyclesperiteration: int, Number of total mutations to run, per 10
|
181 |
+
samples of the population, per iteration.
|
182 |
:param alpha: float, Initial temperature.
|
183 |
:param annealing: bool, Whether to use annealing. You should (and it is default).
|
184 |
:param fractionReplaced: float, How much of population to replace with migrating
|
185 |
+
equations from other populations.
|
186 |
:param fractionReplacedHof: float, How much of population to replace with migrating
|
187 |
+
equations from hall of fame.
|
188 |
:param npop: int, Number of individuals in each population
|
189 |
:param parsimony: float, Multiplicative factor for how much to punish complexity.
|
190 |
:param migration: bool, Whether to migrate.
|
191 |
:param hofMigration: bool, Whether to have the hall of fame migrate.
|
192 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
193 |
+
constants (Nelder-Mead/Newton) at the end of each iteration.
|
194 |
:param topn: int, How many top individuals migrate from each population.
|
195 |
:param perturbationFactor: float, Constants are perturbed by a max
|
196 |
+
factor of (perturbationFactor*T + 1). Either multiplied by this
|
197 |
+
or divided by this.
|
198 |
:param weightAddNode: float, Relative likelihood for mutation to add a node
|
199 |
:param weightInsertNode: float, Relative likelihood for mutation to insert a node
|
200 |
:param weightDeleteNode: float, Relative likelihood for mutation to delete a node
|
201 |
:param weightDoNothing: float, Relative likelihood for mutation to leave the individual
|
202 |
:param weightMutateConstant: float, Relative likelihood for mutation to change
|
203 |
+
the constant slightly in a random direction.
|
204 |
:param weightMutateOperator: float, Relative likelihood for mutation to swap
|
205 |
+
an operator.
|
206 |
:param weightRandomize: float, Relative likelihood for mutation to completely
|
207 |
+
delete and then randomly generate the equation
|
208 |
:param weightSimplify: float, Relative likelihood for mutation to simplify
|
209 |
+
constant parts by evaluation
|
210 |
:param timeout: float, Time in seconds to timeout search
|
211 |
:param equation_file: str, Where to save the files (.csv separated by |)
|
212 |
:param verbosity: int, What verbosity level to use. 0 means minimal print statements.
|
213 |
:param progress: bool, Whether to use a progress bar instead of printing to stdout.
|
214 |
:param maxsize: int, Max size of an equation.
|
215 |
:param maxdepth: int, Max depth of an equation. You can use both maxsize and maxdepth.
|
216 |
+
maxdepth is by default set to = maxsize, which means that it is redundant.
|
217 |
:param fast_cycle: bool, (experimental) - batch over population subsamples. This
|
218 |
+
is a slightly different algorithm than regularized evolution, but does cycles
|
219 |
+
15% faster. May be algorithmically less efficient.
|
220 |
:param variable_names: list, a list of names for the variables, other
|
221 |
+
than "x0", "x1", etc.
|
222 |
:param batching: bool, whether to compare population members on small batches
|
223 |
+
during evolution. Still uses full dataset for comparing against
|
224 |
+
hall of fame.
|
225 |
:param batchSize: int, the amount of data to use if doing batching.
|
226 |
:param select_k_features: (None, int), whether to run feature selection in
|
227 |
+
Python using random forests, before passing to the symbolic regression
|
228 |
+
code. None means no feature selection; an int means select that many
|
229 |
+
features.
|
230 |
:param warmupMaxsizeBy: float, whether to slowly increase max size from
|
231 |
+
a small number up to the maxsize (if greater than 0).
|
232 |
+
If greater than 0, says the fraction of training time at which
|
233 |
+
the current maxsize will reach the user-passed maxsize.
|
234 |
:param constraints: dict of int (unary) or 2-tuples (binary),
|
235 |
+
this enforces maxsize constraints on the individual
|
236 |
+
arguments of operators. E.g., `'pow': (-1, 1)`
|
237 |
+
says that power laws can have any complexity left argument, but only
|
238 |
+
1 complexity exponent. Use this to force more interpretable solutions.
|
239 |
:param useFrequency: bool, whether to measure the frequency of complexities,
|
240 |
+
and use that instead of parsimony to explore equation space. Will
|
241 |
+
naturally find equations of all complexities.
|
242 |
:param julia_optimization: int, Optimization level (0, 1, 2, 3)
|
243 |
:param tempdir: str or None, directory for the temporary files
|
244 |
:param delete_tempfiles: bool, whether to delete the temporary files after finishing
|
245 |
:param julia_project: str or None, a Julia environment location containing
|
246 |
+
a Project.toml (and potentially the source code for SymbolicRegression.jl).
|
247 |
+
Default gives the Python package directory, where a Project.toml file
|
248 |
+
should be present from the install.
|
249 |
:param user_input: Whether to ask for user input or not for installing (to
|
250 |
+
be used for automated scripts). Will choose to install when asked.
|
251 |
:param update: Whether to automatically update Julia packages.
|
252 |
:param temp_equation_file: Whether to put the hall of fame file in
|
253 |
+
the temp directory. Deletion is then controlled with the
|
254 |
+
delete_tempfiles argument.
|
255 |
:param output_jax_format: Whether to create a 'jax_format' column in the output,
|
256 |
+
containing jax-callable functions and the default parameters in a jax array.
|
257 |
:param output_torch_format: Whether to create a 'torch_format' column in the output,
|
258 |
+
containing a torch module with trainable parameters.
|
259 |
:returns: pd.DataFrame or list, Results dataframe,
|
260 |
+
giving complexity, MSE, and equations (as strings), as well as functional
|
261 |
+
forms. If list, each element corresponds to a dataframe of equations
|
262 |
+
for each output.
|
|
|
263 |
"""
|
264 |
if binary_operators is None:
|
265 |
binary_operators = '+ * - /'.split(' ')
|