Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
0cf6092
1
Parent(s):
90c6c4b
Remove not-rendered `:param` key
Browse files- pysr/sr.py +32 -32
pysr/sr.py
CHANGED
@@ -144,7 +144,7 @@ def _maybe_create_inline_operators(
|
|
144 |
not function_name in extra_sympy_mappings
|
145 |
):
|
146 |
raise ValueError(
|
147 |
-
f"Custom function {function_name} is not defined in
|
148 |
"You can define it with, "
|
149 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1/x})`, where "
|
150 |
"`lambda x: 1/x` is a valid SymPy function defining the operator. "
|
@@ -246,7 +246,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
246 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
247 |
List of strings giving the binary operators in Julia's Base.
|
248 |
unary_operators : list[str], default=None
|
249 |
-
Same as
|
250 |
single scalar.
|
251 |
niterations : int, default=40
|
252 |
Number of iterations of the algorithm to run. The best
|
@@ -262,8 +262,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
262 |
maxsize : int, default=20
|
263 |
Max complexity of an equation.
|
264 |
maxdepth : int, default=None
|
265 |
-
Max depth of an equation. You can use both
|
266 |
-
|
267 |
warmup_maxsize_by : float, default=0.0
|
268 |
Whether to slowly increase max size from a small number up to
|
269 |
the maxsize (if greater than 0). If greater than 0, says the
|
@@ -331,7 +331,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
331 |
rather than just the simulated annealing.
|
332 |
alpha : float, default=0.1
|
333 |
Initial temperature for simulated annealing
|
334 |
-
(requires
|
335 |
annealing : bool, default=False
|
336 |
Whether to use annealing.
|
337 |
early_stop_condition : { float | str }, default=None
|
@@ -430,8 +430,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
430 |
deterministic : bool, default=False
|
431 |
Make a PySR search give the same result every run.
|
432 |
To use this, you must turn off parallelism
|
433 |
-
(with
|
434 |
-
and set
|
435 |
warm_start : bool, default=False
|
436 |
Tells fit to continue from where the last call to fit finished.
|
437 |
If false, each call to fit will be fresh, overwriting previous results.
|
@@ -439,14 +439,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
439 |
What verbosity level to use. 0 means minimal print statements.
|
440 |
update_verbosity : int, default=None
|
441 |
What verbosity level to use for package updates.
|
442 |
-
Will take value of
|
443 |
progress : bool, default=True
|
444 |
Whether to use a progress bar instead of printing to stdout.
|
445 |
equation_file : str, default=None
|
446 |
Where to save the files (.csv extension).
|
447 |
temp_equation_file : bool, default=False
|
448 |
Whether to put the hall of fame file in the temp directory.
|
449 |
-
Deletion is then controlled with the
|
450 |
parameter.
|
451 |
tempdir : str, default=None
|
452 |
directory for the temporary files.
|
@@ -467,19 +467,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
467 |
Whether to create a 'torch_format' column in the output,
|
468 |
containing a torch module with trainable parameters.
|
469 |
extra_sympy_mappings : dict[str, Callable], default=None
|
470 |
-
Provides mappings between custom
|
471 |
-
|
472 |
operators defined in sympy.
|
473 |
E.G if `unary_operators=["inv(x)=1/x"]`, then for the fitted
|
474 |
-
model to be export to sympy,
|
475 |
would be `{"inv": lambda x: 1/x}`.
|
476 |
extra_jax_mappings : dict[Callable, str], default=None
|
477 |
-
Similar to
|
478 |
to jax. The dictionary maps sympy functions to jax functions.
|
479 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
480 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
481 |
extra_torch_mappings : dict[Callable, Callable], default=None
|
482 |
-
The same as
|
483 |
to pytorch. Note that the dictionary keys should be callable
|
484 |
pytorch expressions.
|
485 |
For example: `extra_torch_mappings={sympy.sin: torch.sin}`
|
@@ -507,7 +507,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
507 |
Number of output dimensions.
|
508 |
selection_mask_ : list[int] of length `select_k_features`
|
509 |
List of indices for input features that are selected when
|
510 |
-
|
511 |
tempdir_ : Path
|
512 |
Path to the temporary equations directory.
|
513 |
equation_file_ : str
|
@@ -998,7 +998,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
998 |
----------
|
999 |
index : int | list[int], default=None
|
1000 |
If you wish to select a particular equation from `self.equations_`,
|
1001 |
-
give the row number here. This overrides the
|
1002 |
parameter. If there are multiple output features, then pass
|
1003 |
a list of indices with the order the same as the output feature.
|
1004 |
|
@@ -1037,8 +1037,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1037 |
"""
|
1038 |
Set the full pathname of the equation file.
|
1039 |
|
1040 |
-
This is performed using
|
1041 |
-
|
1042 |
"""
|
1043 |
# Cast tempdir string as a Path object
|
1044 |
self.tempdir_ = Path(tempfile.mkdtemp(dir=self.tempdir))
|
@@ -1136,8 +1136,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1136 |
parameter_value = [parameter_value]
|
1137 |
elif parameter == "batch_size" and parameter_value < 1:
|
1138 |
warnings.warn(
|
1139 |
-
"Given
|
1140 |
-
"
|
1141 |
)
|
1142 |
parameter_value = 1
|
1143 |
elif parameter == "progress" and not buffer_available:
|
@@ -1192,7 +1192,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1192 |
if variable_names:
|
1193 |
variable_names = None
|
1194 |
warnings.warn(
|
1195 |
-
"
|
1196 |
"Using DataFrame column names instead."
|
1197 |
)
|
1198 |
|
@@ -1259,16 +1259,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1259 |
-------
|
1260 |
X_transformed : ndarray of shape (n_samples, n_features)
|
1261 |
Transformed training data. n_samples will be equal to
|
1262 |
-
|
1263 |
-
and
|
1264 |
-
equal to
|
1265 |
-
|
1266 |
-
otherwise it will be equal to
|
1267 |
y_transformed : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
1268 |
Transformed target data. n_samples will be equal to
|
1269 |
-
|
1270 |
-
and
|
1271 |
-
equal to
|
1272 |
variable_names_transformed : list[str] of length n_features
|
1273 |
Names of each variable in the transformed dataset,
|
1274 |
`X_transformed`.
|
@@ -1579,7 +1579,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1579 |
in arbitrary ways.
|
1580 |
variable_names : list[str], default=None
|
1581 |
A list of names for the variables, rather than "x0", "x1", etc.
|
1582 |
-
If
|
1583 |
instead of `variable_names`. Cannot contain spaces or special
|
1584 |
characters. Avoid variable names which are also
|
1585 |
function names in `sympy`, such as "N".
|
@@ -1679,7 +1679,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1679 |
"""
|
1680 |
Update self.equations_ with any new options passed.
|
1681 |
|
1682 |
-
For example, updating
|
1683 |
will require a `.refresh()` to update the equations.
|
1684 |
|
1685 |
Parameters
|
@@ -1761,7 +1761,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1761 |
except Exception as error:
|
1762 |
raise ValueError(
|
1763 |
"Failed to evaluate the expression. "
|
1764 |
-
"If you are using a custom operator, make sure to define it in
|
1765 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1/x})`, where "
|
1766 |
"`lambda x: 1/x` is a valid SymPy function defining the operator. "
|
1767 |
"You can then run `model.refresh()` to re-load the expressions."
|
|
|
144 |
not function_name in extra_sympy_mappings
|
145 |
):
|
146 |
raise ValueError(
|
147 |
+
f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
|
148 |
"You can define it with, "
|
149 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1/x})`, where "
|
150 |
"`lambda x: 1/x` is a valid SymPy function defining the operator. "
|
|
|
246 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
247 |
List of strings giving the binary operators in Julia's Base.
|
248 |
unary_operators : list[str], default=None
|
249 |
+
Same as `binary_operators` but for operators taking a
|
250 |
single scalar.
|
251 |
niterations : int, default=40
|
252 |
Number of iterations of the algorithm to run. The best
|
|
|
262 |
maxsize : int, default=20
|
263 |
Max complexity of an equation.
|
264 |
maxdepth : int, default=None
|
265 |
+
Max depth of an equation. You can use both `maxsize` and
|
266 |
+
`maxdepth`. `maxdepth` is by default not used.
|
267 |
warmup_maxsize_by : float, default=0.0
|
268 |
Whether to slowly increase max size from a small number up to
|
269 |
the maxsize (if greater than 0). If greater than 0, says the
|
|
|
331 |
rather than just the simulated annealing.
|
332 |
alpha : float, default=0.1
|
333 |
Initial temperature for simulated annealing
|
334 |
+
(requires `annealing` to be `True`).
|
335 |
annealing : bool, default=False
|
336 |
Whether to use annealing.
|
337 |
early_stop_condition : { float | str }, default=None
|
|
|
430 |
deterministic : bool, default=False
|
431 |
Make a PySR search give the same result every run.
|
432 |
To use this, you must turn off parallelism
|
433 |
+
(with `procs`=0, `multithreading`=False),
|
434 |
+
and set `random_state` to a fixed seed.
|
435 |
warm_start : bool, default=False
|
436 |
Tells fit to continue from where the last call to fit finished.
|
437 |
If false, each call to fit will be fresh, overwriting previous results.
|
|
|
439 |
What verbosity level to use. 0 means minimal print statements.
|
440 |
update_verbosity : int, default=None
|
441 |
What verbosity level to use for package updates.
|
442 |
+
Will take value of `verbosity` if not given.
|
443 |
progress : bool, default=True
|
444 |
Whether to use a progress bar instead of printing to stdout.
|
445 |
equation_file : str, default=None
|
446 |
Where to save the files (.csv extension).
|
447 |
temp_equation_file : bool, default=False
|
448 |
Whether to put the hall of fame file in the temp directory.
|
449 |
+
Deletion is then controlled with the `delete_tempfiles`
|
450 |
parameter.
|
451 |
tempdir : str, default=None
|
452 |
directory for the temporary files.
|
|
|
467 |
Whether to create a 'torch_format' column in the output,
|
468 |
containing a torch module with trainable parameters.
|
469 |
extra_sympy_mappings : dict[str, Callable], default=None
|
470 |
+
Provides mappings between custom `binary_operators` or
|
471 |
+
`unary_operators` defined in julia strings, to those same
|
472 |
operators defined in sympy.
|
473 |
E.G if `unary_operators=["inv(x)=1/x"]`, then for the fitted
|
474 |
+
model to be export to sympy, `extra_sympy_mappings`
|
475 |
would be `{"inv": lambda x: 1/x}`.
|
476 |
extra_jax_mappings : dict[Callable, str], default=None
|
477 |
+
Similar to `extra_sympy_mappings` but for model export
|
478 |
to jax. The dictionary maps sympy functions to jax functions.
|
479 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
480 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
481 |
extra_torch_mappings : dict[Callable, Callable], default=None
|
482 |
+
The same as `extra_jax_mappings` but for model export
|
483 |
to pytorch. Note that the dictionary keys should be callable
|
484 |
pytorch expressions.
|
485 |
For example: `extra_torch_mappings={sympy.sin: torch.sin}`
|
|
|
507 |
Number of output dimensions.
|
508 |
selection_mask_ : list[int] of length `select_k_features`
|
509 |
List of indices for input features that are selected when
|
510 |
+
`select_k_features` is set.
|
511 |
tempdir_ : Path
|
512 |
Path to the temporary equations directory.
|
513 |
equation_file_ : str
|
|
|
998 |
----------
|
999 |
index : int | list[int], default=None
|
1000 |
If you wish to select a particular equation from `self.equations_`,
|
1001 |
+
give the row number here. This overrides the `model_selection`
|
1002 |
parameter. If there are multiple output features, then pass
|
1003 |
a list of indices with the order the same as the output feature.
|
1004 |
|
|
|
1037 |
"""
|
1038 |
Set the full pathname of the equation file.
|
1039 |
|
1040 |
+
This is performed using `tempdir` and
|
1041 |
+
`equation_file`.
|
1042 |
"""
|
1043 |
# Cast tempdir string as a Path object
|
1044 |
self.tempdir_ = Path(tempfile.mkdtemp(dir=self.tempdir))
|
|
|
1136 |
parameter_value = [parameter_value]
|
1137 |
elif parameter == "batch_size" and parameter_value < 1:
|
1138 |
warnings.warn(
|
1139 |
+
"Given `batch_size` must be greater than or equal to one. "
|
1140 |
+
"`batch_size` has been increased to equal one."
|
1141 |
)
|
1142 |
parameter_value = 1
|
1143 |
elif parameter == "progress" and not buffer_available:
|
|
|
1192 |
if variable_names:
|
1193 |
variable_names = None
|
1194 |
warnings.warn(
|
1195 |
+
"`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1196 |
"Using DataFrame column names instead."
|
1197 |
)
|
1198 |
|
|
|
1259 |
-------
|
1260 |
X_transformed : ndarray of shape (n_samples, n_features)
|
1261 |
Transformed training data. n_samples will be equal to
|
1262 |
+
`Xresampled.shape[0]` if `self.denoise` is `True`,
|
1263 |
+
and `Xresampled is not None`, otherwise it will be
|
1264 |
+
equal to `X.shape[0]`. n_features will be equal to
|
1265 |
+
`self.select_k_features` if `self.select_k_features is not None`,
|
1266 |
+
otherwise it will be equal to `X.shape[1]`
|
1267 |
y_transformed : ndarray of shape (n_samples,) or (n_samples, n_outputs)
|
1268 |
Transformed target data. n_samples will be equal to
|
1269 |
+
`Xresampled.shape[0]` if `self.denoise` is `True`,
|
1270 |
+
and `Xresampled is not None`, otherwise it will be
|
1271 |
+
equal to `X.shape[0]`.
|
1272 |
variable_names_transformed : list[str] of length n_features
|
1273 |
Names of each variable in the transformed dataset,
|
1274 |
`X_transformed`.
|
|
|
1579 |
in arbitrary ways.
|
1580 |
variable_names : list[str], default=None
|
1581 |
A list of names for the variables, rather than "x0", "x1", etc.
|
1582 |
+
If `X` is a pandas dataframe, the column names will be used
|
1583 |
instead of `variable_names`. Cannot contain spaces or special
|
1584 |
characters. Avoid variable names which are also
|
1585 |
function names in `sympy`, such as "N".
|
|
|
1679 |
"""
|
1680 |
Update self.equations_ with any new options passed.
|
1681 |
|
1682 |
+
For example, updating `extra_sympy_mappings`
|
1683 |
will require a `.refresh()` to update the equations.
|
1684 |
|
1685 |
Parameters
|
|
|
1761 |
except Exception as error:
|
1762 |
raise ValueError(
|
1763 |
"Failed to evaluate the expression. "
|
1764 |
+
"If you are using a custom operator, make sure to define it in `extra_sympy_mappings`, "
|
1765 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1/x})`, where "
|
1766 |
"`lambda x: 1/x` is a valid SymPy function defining the operator. "
|
1767 |
"You can then run `model.refresh()` to re-load the expressions."
|