diff --git "a/pysr/sr.py" "b/pysr/sr.py"
--- "a/pysr/sr.py"
+++ "b/pysr/sr.py"
@@ -72,7 +72,7 @@ sympy_mappings = {
 def pysr(X, y, weights=None, **kwargs):  # pragma: no cover
     warnings.warn(
         "Calling `pysr` is deprecated. Please use `model = PySRRegressor(**params); model.fit(X, y)` going forward.",
-        DeprecationWarning,
+        FutureWarning,
     )
     model = PySRRegressor(**kwargs)
     model.fit(X, y, weights=weights)
@@ -124,17 +124,6 @@ def _create_inline_operators(binary_operators, unary_operators):
                 op_list[i] = function_name
 
 
-def _handle_feature_selection(X, select_k_features, y, variable_names):
-    if select_k_features is not None:
-        selection = run_feature_selection(X, y, select_k_features)
-        print(f"Using features {[variable_names[i] for i in selection]}")
-        X = X[:, selection]
-
-    else:
-        selection = None
-    return X, selection
-
-
 def _check_assertions(
     X,
     binary_operators,
@@ -156,29 +145,6 @@ def _check_assertions(
         assert len(variable_names) == X.shape[1]
 
 
-def run_feature_selection(X, y, select_k_features):
-    """Use a gradient boosting tree regressor as a proxy for finding
-    the k most important features in X, returning indices for those
-    features as output."""
-
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.feature_selection import SelectFromModel
-
-    clf = RandomForestRegressor(n_estimators=100, max_depth=3, random_state=0)
-    clf.fit(X, y)
-    selector = SelectFromModel(
-        clf, threshold=-np.inf, max_features=select_k_features, prefit=True
-    )
-    return selector.get_support(indices=True)
-
-
-def _escape_filename(filename):
-    """Turns a file into a string representation with correctly escaped backslashes"""
-    str_repr = str(filename)
-    str_repr = str_repr.replace("\\", "\\\\")
-    return str_repr
-
-
 def best(*args, **kwargs):  # pragma: no cover
     raise NotImplementedError(
         "`best` has been deprecated. Please use the `PySRRegressor` interface. After fitting, you can return `.sympy()` to get the sympy representation of the best equation."
@@ -203,20 +169,6 @@ def best_callable(*args, **kwargs):  # pragma: no cover
     )
 
 
-def _denoise(X, y, Xresampled=None):
-    """Denoise the dataset using a Gaussian process"""
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
-
-    gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
-    gpr = GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=50)
-    gpr.fit(X, y)
-    if Xresampled is not None:
-        return Xresampled, gpr.predict(Xresampled)
-
-    return X, gpr.predict(X)
-
-
 class CallableEquation:
     """Simple wrapper for numpy lambda functions built with sympy"""
 
@@ -234,548 +186,608 @@ class CallableEquation:
         expected_shape = (X.shape[0],)
         if isinstance(X, pd.DataFrame):
             # Lambda function takes as argument:
-            return self._lambda(**{k: X[k].values for k in X.columns}) * np.ones(
-                expected_shape
-            )
-        elif self._selection is not None:
-            return self._lambda(*X[:, self._selection].T) * np.ones(expected_shape)
-        return self._lambda(*X.T) * np.ones(expected_shape)
+            return self._lambda(
+                **{k: X[k].values for k in self._variable_names}
+            ) * np.ones(expected_shape)
+        else:
+            if self._selection is not None:
+                X = X[:, self._selection]
+            return self._lambda(*X.T) * np.ones(expected_shape)
+
+
+class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
+    """
+    Symbolic regression - scikit-learn interface for SymbolicRegression.jl.
+
+    Parameters
+    ----------
+    model_selection : str, default="best"
+        Model selection criterion. Can be 'accuracy' or 'best'.
+        `"accuracy"` selects the candidate model with the lowest loss
+        (highest accuracy). `"best"` selects the candidate model with
+        the lowest sum of normalized loss and complexity.
+
+    binary_operators : list[str], default=["+", "-", "*", "/"]
+        List of strings giving the binary operators in Julia's Base.
+
+    unary_operators : list[str], default=[]
+        Same as :param`binary_operators` but for operators taking a
+        single scalar.
+
+    niterations : int, default=40
+        Number of iterations of the algorithm to run. The best
+        equations are printed and migrate between populations at the
+        end of each iteration.
+
+    populations : int, default=15
+        Number of populations running.
+
+    population_size : int, default=33
+        Number of individuals in each population.
+
+    max_evals : int, default=None
+        Limits the total number of evaluations of expressions to
+        this number.
+
+    maxsize : int, default=20
+        Max size of an equation.
+
+    maxdepth : int, default=None
+        Max depth of an equation. You can use both :param`maxsize` and
+        :param`maxdepth`. :param`maxdepth` is by default set to equal
+        :param`maxsize`, which means that it is redundant.
+
+    warmup_maxsize_by : float, default=0.0
+        Whether to slowly increase max size from a small number up to
+        the maxsize (if greater than 0).  If greater than 0, says the
+        fraction of training time at which the current maxsize will
+        reach the user-passed maxsize.
+
+    timeout_in_seconds : float, default=None
+        Make the search return early once this many seconds have passed.
+
+    constraints : dict[str, int | tuple[int,int]], default={}
+        Dictionary of int (unary) or 2-tuples (binary), this enforces
+        maxsize constraints on the individual arguments of operators.
+        E.g., `'pow': (-1, 1)` says that power laws can have any
+        complexity left argument, but only 1 complexity exponent. Use
+        this to force more interpretable solutions.
+
+    nested_constraints : dict[str, dict], default=None
+        Specifies how many times a combination of operators can be
+        nested. For example, `{"sin": {"cos": 0}}, "cos": {"cos": 2}}`
+        specifies that `cos` may never appear within a `sin`, but `sin`
+        can be nested with itself an unlimited number of times. The
+        second term specifies that `cos` can be nested up to 2 times
+        within a `cos`, so that `cos(cos(cos(x)))` is allowed
+        (as well as any combination of `+` or `-` within it), but
+        `cos(cos(cos(cos(x))))` is not allowed. When an operator is not
+        specified, it is assumed that it can be nested an unlimited
+        number of times. This requires that there is no operator which
+        is used both in the unary operators and the binary operators
+        (e.g., `-` could be both subtract, and negation). For binary
+        operators, you only need to provide a single number: both
+        arguments are treated the same way, and the max of each
+        argument is constrained.
+
+    loss : str, default="L2DistLoss()"
+        String of Julia code specifying the loss function. Can either
+        be a loss from LossFunctions.jl, or your own loss written as a
+        function. Examples of custom written losses include:
+        `myloss(x, y) = abs(x-y)` for non-weighted, or
+        `myloss(x, y, w) = w*abs(x-y)` for weighted.
+
+        Among the included losses, these are as follows.
+        Regression: `LPDistLoss{P}()`, `L1DistLoss()`,
+        `L2DistLoss()` (mean square), `LogitDistLoss()`,
+        `HuberLoss(d)`, `L1EpsilonInsLoss(ϵ)`, `L2EpsilonInsLoss(ϵ)`,
+        `PeriodicLoss(c)`, `QuantileLoss(τ)`.
+        Classification: `ZeroOneLoss()`, `PerceptronLoss()`,
+        `L1HingeLoss()`, `SmoothedL1HingeLoss(γ)`,
+        `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
+        `SigmoidLoss()`, `DWDMarginLoss(q)`.
+
+    complexity_of_operators : dict[str, float], default=None
+        If you would like to use a complexity other than 1 for an
+        operator, specify the complexity here. For example,
+        `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
+        of the `sin` operator, and a complexity of 1 for each use of
+        the `+` operator (which is the default). You may specify real
+        numbers for a complexity, and the total complexity of a tree
+        will be rounded to the nearest integer after computing.
+
+    complexity_of_constants : float, default=1
+        Complexity of constants.
+
+    complexity_of_variables : float, default=1
+        Complexity of variables.
+
+    parsimony : float, default=0.0032
+        Multiplicative factor for how much to punish complexity.
+
+    use_frequency : bool, default=True
+        Whether to measure the frequency of complexities, and use that
+        instead of parsimony to explore equation space. Will naturally
+        find equations of all complexities.
+
+    use_frequency_in_tournament : bool, default=True
+        Whether to use the frequency mentioned above in the tournament,
+        rather than just the simulated annealing.
+
+    alpha : float, default=0.1
+        Initial temperature for simulated annealing
+        (requires :param`annealing` to be `True`).
+
+    annealing : bool, default=True
+        Whether to use annealing. You should (and it is default).
+
+    early_stop_condition : float, default=None
+        Stop the search early if this loss is reached.
+
+    ncyclesperiteration : int, default=550
+        Number of total mutations to run, per 10 samples of the
+        population, per iteration.
+
+    fraction_replaced : float, default=0.000364
+        How much of population to replace with migrating equations from
+        other populations.
+
+    fraction_replaced_hof : float, default=0.035
+        How much of population to replace with migrating equations from
+        hall of fame.
+
+    weight_add_node : float, default=0.79
+        Relative likelihood for mutation to add a node.
+
+    weight_insert_node : float, default=5.1
+        Relative likelihood for mutation to insert a node.
+
+    weight_delete_node : float, default=1.7
+        Relative likelihood for mutation to delete a node.
+
+    weight_do_nothing : float, default=0.21
+        Relative likelihood for mutation to leave the individual.
+
+    weight_mutate_constant : float, default=0.048
+        Relative likelihood for mutation to change the constant slightly in a random direction.
+
+    weight_mutate_operator : float, default=0.47
+        Relative likelihood for mutation to swap an operator.
+
+    weight_randomize : float, default=0.00023
+        Relative likelihood for mutation to completely delete and then randomly generate the equation
+
+    weight_simplify : float, default=0.0020
+        Relative likelihood for mutation to simplify constant parts by evaluation
+
+    crossover_probability : float, default=0.066
+        Absolute probability of crossover-type genetic operation, instead of a mutation.
 
+    skip_mutation_failures : bool, default=True
+        Whether to skip mutation and crossover failures, rather than
+        simply re-sampling the current member.
 
-def _get_julia_project(julia_project):
-    if julia_project is None:
-        is_shared = True
-        julia_project = f"pysr-{__version__}"
-    else:
-        is_shared = False
-        julia_project = Path(julia_project)
-    return julia_project, is_shared
+    migration : bool, default=True
+        Whether to migrate.
 
+    hof_migration : bool, default=True
+        Whether to have the hall of fame migrate.
 
-def is_julia_version_greater_eq(Main, version="1.6"):
-    """Check if Julia version is greater than specified version."""
-    return Main.eval(f'VERSION >= v"{version}"')
+    topn : int, default=12
+        How many top individuals migrate from each population.
 
+    should_optimize_constants : bool, default=True
+        Whether to numerically optimize constants (Nelder-Mead/Newton)
+        at the end of each iteration.
 
-def init_julia():
-    """Initialize julia binary, turning off compiled modules if needed."""
-    from julia.core import JuliaInfo, UnsupportedPythonError
+    optimizer_algorithm : str, default="BFGS"
+        Optimization scheme to use for optimizing constants. Can currently
+        be `NelderMead` or `BFGS`.
 
-    try:
-        info = JuliaInfo.load(julia="julia")
-    except FileNotFoundError:
-        env_path = os.environ["PATH"]
-        raise FileNotFoundError(
-            f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
-        )
+    optimizer_nrestarts : int, default=2
+        Number of time to restart the constants optimization process with
+        different initial conditions.
 
-    if not info.is_pycall_built():
-        raise ImportError(import_error_string())
+    optimize_probability : float, default=0.14
+        Probability of optimizing the constants during a single iteration of
+        the evolutionary algorithm.
 
-    Main = None
-    try:
-        from julia import Main as _Main
+    optimizer_iterations : int, default=8
+        Number of iterations that the constants optimizer can take.
 
-        Main = _Main
-    except UnsupportedPythonError:
-        # Static python binary, so we turn off pre-compiled modules.
-        from julia.core import Julia
+    perturbation_factor : float, default=0.076
+        Constants are perturbed by a max factor of
+        (perturbation_factor*T + 1). Either multiplied by this or
+        divided by this.
 
-        jl = Julia(compiled_modules=False)
-        from julia import Main as _Main
+    tournament_selection_n : int, default=10
+        Number of expressions to consider in each tournament.
 
-        Main = _Main
+    tournament_selection_p : float, default=0.86
+        Probability of selecting the best expression in each
+        tournament. The probability will decay as p*(1-p)^n for other
+        expressions, sorted by loss.
 
-    return Main
+    procs : int, default=multiprocessing.cpu_count()
+        Number of processes (=number of populations running).
 
+    multithreading : bool, default=True
+        Use multithreading instead of distributed backend.
+        Using procs=0 will turn off both.
 
-def _add_sr_to_julia_project(Main, io_arg):
-    Main.sr_spec = Main.PackageSpec(
-        name="SymbolicRegression",
-        url="https://github.com/MilesCranmer/SymbolicRegression.jl",
-        rev="v" + __symbolic_regression_jl_version__,
-    )
-    Main.eval(f"Pkg.add(sr_spec, {io_arg})")
-    Main.clustermanagers_spec = Main.PackageSpec(
-        name="ClusterManagers",
-        url="https://github.com/JuliaParallel/ClusterManagers.jl",
-        rev="14e7302f068794099344d5d93f71979aaf4fbeb3",
-    )
-    Main.eval(f"Pkg.add(clustermanagers_spec, {io_arg})")
+    cluster_manager : str, default=None
+        For distributed computing, this sets the job queue system. Set
+        to one of "slurm", "pbs", "lsf", "sge", "qrsh", "scyld", or
+        "htc". If set to one of these, PySR will run in distributed
+        mode, and use `procs` to figure out how many processes to launch.
+
+    batching : bool, default=False
+        Whether to compare population members on small batches during
+        evolution. Still uses full dataset for comparing against hall
+        of fame.
+
+    batch_size : int, default=50
+        The amount of data to use if doing batching.
+
+    fast_cycle : bool, default=False (experimental)
+        Batch over population subsamples. This is a slightly different
+        algorithm than regularized evolution, but does cycles 15%
+        faster. May be algorithmically less efficient.
+
+    precision : int, default=32
+        What precision to use for the data. By default this is 32
+        (float32), but you can select 64 or 16 as well.
+
+    verbosity : int, default=1e9
+        What verbosity level to use. 0 means minimal print statements.
 
+    update_verbosity : int, default=None
+        What verbosity level to use for package updates.
+        Will take value of :param`verbosity` if not given.
+
+    progress : bool, default=True
+        Whether to use a progress bar instead of printing to stdout.
+
+    equation_file : str, default=None
+        Where to save the files (.csv separated by |).
+
+    temp_equation_file :
+        Whether to put the hall of fame file in the temp directory.
+        Deletion is then controlled with the :param`delete_tempfiles`
+        parameter.
+
+    tempdir : str, default=None
+        directory for the temporary files.
+
+    delete_tempfiles : bool, default=True
+        Whether to delete the temporary files after finishing.
+
+    julia_project : str, default=None
+        A Julia environment location containing a Project.toml
+        (and potentially the source code for SymbolicRegression.jl).
+        Default gives the Python package directory, where a
+        Project.toml file should be present from the install.
+
+    update: bool, default=True
+        Whether to automatically update Julia packages.
+
+    output_jax_format : bool, default=False
+        Whether to create a 'jax_format' column in the output,
+        containing jax-callable functions and the default parameters in
+        a jax array.
+
+    output_torch_format : bool, default=False
+        Whether to create a 'torch_format' column in the output,
+        containing a torch module with trainable parameters.
+
+    extra_sympy_mappings : dict[str, Callable], default={}
+        Provides mappings between custom :param`binary_operators` or
+        :param`unary_operators` defined in julia strings, to those same
+        operators defined in sympy.
+        E.G if `unary_operators=["inv(x)=1/x"]`, then for the fitted
+        model to be export to sympy, :param`extra_sympy_mappings`
+        would be `{"inv": lambda x: 1/x}`.
+
+    extra_jax_mappings : dict[Callable, str], default={}
+        Similar to :param`extra_sympy_mappings` but for model export
+        to jax. The dictionary maps sympy functions to jax functions.
+        For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
+        the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
+
+    extra_torch_mappings : dict[Callable, Callable], default={}
+        The same as :param`extra_jax_mappings` but for model export
+        to pytorch. Note that the dictionary keys should be callable
+        pytorch expressions.
+        For example: `extra_torch_mappings={sympy.sin: torch.sin}`
+
+    denoise : bool, default=False
+        Whether to use a Gaussian Process to denoise the data before
+        inputting to PySR. Can help PySR fit noisy data.
+
+    select_k_features : int, default=None
+         whether to run feature selection in Python using random forests,
+         before passing to the symbolic regression code. None means no
+         feature selection; an int means select that many features.
+
+    kwargs : dict, default=None
+        Supports deprecated keyword arguments. Other arguments will
+        result in an error.
+
+    Attributes
+    ----------
+    equations_ : pandas.DataFrame
+        DataFrame containing the results of model fitting.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    nout_ : int
+        Number of output dimensions.
+
+    selection_mask_ : list[int] of length `select_k_features`
+        List of indices for input features that are selected when
+        :param`select_k_features` is set.
+
+    raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
+        The state for the julia SymbolicRegression.jl backend post fitting.
+
+    Notes
+    -----
+    Most default parameters have been tuned over several example equations,
+    but you should adjust `niterations`, `binary_operators`, `unary_operators`
+    to your requirements. You can view more detailed explanations of the options
+    on the [options page](https://astroautomata.com/PySR/#/options) of the
+    documentation.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from pysr import PySRRegressor
+    >>> randstate = np.random.RandomState(0)
+    >>> X = 2 * randstate.randn(100, 5)
+    >>> # y = 2.5372 * cos(x_3) + x_0 - 0.5
+    >>> y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 0.5
+    >>> model = PySRRegressor(
+    >>>             niterations=40,
+    >>>             binary_operators=["+", "*"],
+    >>>             unary_operators=[
+    >>>                 "cos",
+    >>>                 "exp",
+    >>>                 "sin",
+    >>>                 "inv(x) = 1/x",  # Custom operator (julia syntax)
+    >>>             ],
+    >>>             model_selection="best",
+    >>>             loss="loss(x, y) = (x - y)^2",  # Custom loss function (julia syntax)
+    >>>         )
+    >>> model.fit(X, y)
+    PySRRegressor.equations = [0e-02  (((x0 * x0) + ((cos(x3) * inv(sin(sin(cos(0.5076455))))) * (0.79839814 + sin(inv(0.5623672))))) + -0.5378383)                                                                      pick      score                                           equation          loss  complexity
+    0         0.000000                                          3.8552167  3.360272e+01           1
+    1         1.189847                                          (x0 * x0)  3.110905e+00           3
+    2         0.010626                          ((x0 * x0) + -0.25573406)  3.045491e+00           5
+    3         0.896632                              (cos(x3) + (x0 * x0))  1.242382e+00           6
+    4         0.811362                ((x0 * x0) + (cos(x3) * 2.4384754))  2.451971e-01           8
+    5  >>>>  13.733371          (((cos(x3) * 2.5382) + (x0 * x0)) + -0.5)  2.889755e-13          10
+    6         0.194695  ((x0 * x0) + (((cos(x3) + -0.063180044) * 2.53...  1.957723e-13          12
+    7         0.006988  ((x0 * x0) + (((cos(x3) + -0.32505524) * 1.538...  1.944089e-13          13
+    8         0.000955  (((((x0 * x0) + cos(x3)) + -0.8251649) + (cos(...  1.940381e-13          15
+    ]
+    >>> model.score(X, y)
+    1.0
+    >>> model.predict(np.array([1,2,3,4,5]))
+    array([-1.15907818, -1.15907818, -1.15907818, -1.15907818, -1.15907818])
+    """
+
+    # Class validation constants
+    VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
 
-class PySRRegressor(BaseEstimator, RegressorMixin):
     def __init__(
         self,
         model_selection="best",
         *,
-        weights=None,
-        binary_operators=None,
-        unary_operators=None,
-        procs=cpu_count(),
-        loss="L2DistLoss()",
-        complexity_of_operators=None,
-        complexity_of_constants=None,
-        complexity_of_variables=None,
-        populations=15,
+        binary_operators=[
+            "+",
+            "-",
+            "*",
+            "/",
+        ],
+        unary_operators=[],
         niterations=40,
-        ncyclesperiteration=550,
+        populations=15,
+        population_size=33,
+        max_evals=None,
+        maxsize=20,
+        maxdepth=None,
+        warmup_maxsize_by=0.0,
         timeout_in_seconds=None,
+        constraints={},
+        nested_constraints=None,
+        loss="L2DistLoss()",
+        complexity_of_operators=None,
+        complexity_of_constants=1,
+        complexity_of_variables=1,
+        parsimony=0.0032,
+        use_frequency=True,
+        use_frequency_in_tournament=True,
         alpha=0.1,
-        annealing=False,
+        annealing=True,
+        early_stop_condition=None,
+        ncyclesperiteration=550,
         fraction_replaced=0.000364,
         fraction_replaced_hof=0.035,
-        population_size=33,
-        parsimony=0.0032,
-        migration=True,
-        hof_migration=True,
-        should_optimize_constants=True,
-        topn=12,
         weight_add_node=0.79,
+        weight_insert_node=5.1,
         weight_delete_node=1.7,
         weight_do_nothing=0.21,
-        weight_insert_node=5.1,
         weight_mutate_constant=0.048,
         weight_mutate_operator=0.47,
         weight_randomize=0.00023,
         weight_simplify=0.0020,
         crossover_probability=0.066,
+        skip_mutation_failures=True,
+        migration=True,
+        hof_migration=True,
+        topn=12,
+        should_optimize_constants=True,
+        optimizer_algorithm="BFGS",
+        optimizer_nrestarts=2,
+        optimize_probability=0.14,
+        optimizer_iterations=8,
         perturbation_factor=0.076,
-        extra_sympy_mappings=None,
-        extra_torch_mappings=None,
-        extra_jax_mappings=None,
-        equation_file=None,
-        verbosity=1e9,
-        update_verbosity=None,
-        progress=None,
-        maxsize=20,
-        fast_cycle=False,
-        maxdepth=None,
-        variable_names=None,
+        tournament_selection_n=10,
+        tournament_selection_p=0.86,
+        procs=cpu_count(),
+        multithreading=None,
+        cluster_manager=None,
         batching=False,
         batch_size=50,
-        select_k_features=None,
-        warmup_maxsize_by=0.0,
-        constraints=None,
-        nested_constraints=None,
-        use_frequency=True,
-        use_frequency_in_tournament=True,
+        fast_cycle=False,
+        precision=32,
+        verbosity=1e9,
+        update_verbosity=None,
+        progress=True,
+        equation_file=None,
+        temp_equation_file=False,
         tempdir=None,
         delete_tempfiles=True,
         julia_project=None,
         update=True,
-        temp_equation_file=False,
         output_jax_format=False,
         output_torch_format=False,
-        optimizer_algorithm="BFGS",
-        optimizer_nrestarts=2,
-        optimize_probability=0.14,
-        optimizer_iterations=8,
-        tournament_selection_n=10,
-        tournament_selection_p=0.86,
+        extra_sympy_mappings={},
+        extra_torch_mappings={},
+        extra_jax_mappings={},
         denoise=False,
-        Xresampled=None,
-        precision=32,
-        multithreading=None,
-        cluster_manager=None,
-        skip_mutation_failures=True,
-        max_evals=None,
-        early_stop_condition=None,
-        # To support deprecated kwargs:
+        select_k_features=None,
         **kwargs,
     ):
-        """Initialize settings for an equation search in PySR.
-
-        Note: most default parameters have been tuned over several example
-        equations, but you should adjust `niterations`,
-        `binary_operators`, `unary_operators` to your requirements.
-        You can view more detailed explanations of the options on the
-        [options page](https://astroautomata.com/PySR/#/options) of the documentation.
-
-        :param model_selection: How to select a model. Can be 'accuracy' or 'best'. The default, 'best', will optimize a combination of complexity and accuracy.
-        :type model_selection: str
-        :param binary_operators: List of strings giving the binary operators in Julia's Base. Default is ["+", "-", "*", "/",].
-        :type binary_operators: list
-        :param unary_operators: Same but for operators taking a single scalar. Default is [].
-        :type unary_operators: list
-        :param niterations: Number of iterations of the algorithm to run. The best equations are printed, and migrate between populations, at the end of each.
-        :type niterations: int
-        :param populations: Number of populations running.
-        :type populations: int
-        :param loss: String of Julia code specifying the loss function.  Can either be a loss from LossFunctions.jl, or your own loss written as a function. Examples of custom written losses include: `myloss(x, y) = abs(x-y)` for non-weighted, or `myloss(x, y, w) = w*abs(x-y)` for weighted.  Among the included losses, these are as follows. Regression: `LPDistLoss{P}()`, `L1DistLoss()`, `L2DistLoss()` (mean square), `LogitDistLoss()`, `HuberLoss(d)`, `L1EpsilonInsLoss(ϵ)`, `L2EpsilonInsLoss(ϵ)`, `PeriodicLoss(c)`, `QuantileLoss(τ)`.  Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`, `SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
-        :type loss: str
-        :param complexity_of_operators: If you would like to use a complexity other than 1 for
-        an operator, specify the complexity here. For example, `{"sin": 2, "+": 1}` would give
-        a complexity of 2 for each use of the `sin` operator, and a complexity of 1
-        for each use of the `+` operator (which is the default). You may specify
-        real numbers for a complexity, and the total complexity of a tree will be rounded
-        to the nearest integer after computing.
-        :type complexity_of_operators: dict
-        :param complexity_of_constants: Complexity of constants. Default is 1.
-        :type complexity_of_constants: int/float
-        :param complexity_of_variables: Complexity of variables. Default is 1.
-        :type complexity_of_variables: int/float
-        :param denoise: Whether to use a Gaussian Process to denoise the data before inputting to PySR. Can help PySR fit noisy data.
-        :type denoise: bool
-        :param select_k_features: whether to run feature selection in Python using random forests, before passing to the symbolic regression code. None means no feature selection; an int means select that many features.
-        :type select_k_features: None/int
-        :param procs: Number of processes (=number of populations running).
-        :type procs: int
-        :param multithreading: Use multithreading instead of distributed backend. Default is yes. Using procs=0 will turn off both.
-        :type multithreading: bool
-        :param cluster_manager: For distributed computing, this sets the job queue
-        system. Set to one of "slurm", "pbs", "lsf", "sge", "qrsh", "scyld", or "htc".
-        If set to one of these, PySR will run in distributed mode, and use `procs` to figure
-        out how many processes to launch.
-        :type cluster_manager: str
-        :param batching: whether to compare population members on small batches during evolution. Still uses full dataset for comparing against hall of fame.
-        :type batching: bool
-        :param batch_size: the amount of data to use if doing batching.
-        :type batch_size: int
-        :param maxsize: Max size of an equation.
-        :type maxsize: int
-        :param ncyclesperiteration: Number of total mutations to run, per 10 samples of the population, per iteration.
-        :type ncyclesperiteration: int
-        :param timeout_in_seconds: Make the search return early once this many seconds have passed.
-        :type timeout_in_seconds: float/int
-        :param alpha: Initial temperature.
-        :type alpha: float
-        :param annealing: Whether to use annealing. You should (and it is default).
-        :type annealing: bool
-        :param fraction_replaced: How much of population to replace with migrating equations from other populations.
-        :type fraction_replaced: float
-        :param fraction_replaced_hof: How much of population to replace with migrating equations from hall of fame.
-        :type fraction_replaced_hof: float
-        :param population_size: Number of individuals in each population
-        :type population_size: int
-        :param parsimony: Multiplicative factor for how much to punish complexity.
-        :type parsimony: float
-        :param migration: Whether to migrate.
-        :type migration: bool
-        :param hof_migration: Whether to have the hall of fame migrate.
-        :type hof_migration: bool
-        :param should_optimize_constants: Whether to numerically optimize constants (Nelder-Mead/Newton) at the end of each iteration.
-        :type should_optimize_constants: bool
-        :param topn: How many top individuals migrate from each population.
-        :type topn: int
-        :param perturbation_factor: Constants are perturbed by a max factor of (perturbation_factor*T + 1). Either multiplied by this or divided by this.
-        :type perturbation_factor: float
-        :param weight_add_node: Relative likelihood for mutation to add a node
-        :type weight_add_node: float
-        :param weight_insert_node: Relative likelihood for mutation to insert a node
-        :type weight_insert_node: float
-        :param weight_delete_node: Relative likelihood for mutation to delete a node
-        :type weight_delete_node: float
-        :param weight_do_nothing: Relative likelihood for mutation to leave the individual
-        :type weight_do_nothing: float
-        :param weight_mutate_constant: Relative likelihood for mutation to change the constant slightly in a random direction.
-        :type weight_mutate_constant: float
-        :param weight_mutate_operator: Relative likelihood for mutation to swap an operator.
-        :type weight_mutate_operator: float
-        :param weight_randomize: Relative likelihood for mutation to completely delete and then randomly generate the equation
-        :type weight_randomize: float
-        :param weight_simplify: Relative likelihood for mutation to simplify constant parts by evaluation
-        :type weight_simplify: float
-        :param crossover_probability: Absolute probability of crossover-type genetic operation, instead of a mutation.
-        :type crossover_probability: float
-        :param equation_file: Where to save the files (.csv separated by |)
-        :type equation_file: str
-        :param verbosity: What verbosity level to use. 0 means minimal print statements.
-        :type verbosity: int
-        :param update_verbosity: What verbosity level to use for package updates. Will take value of `verbosity` if not given.
-        :type update_verbosity: int
-        :param progress: Whether to use a progress bar instead of printing to stdout.
-        :type progress: bool
-        :param maxdepth: Max depth of an equation. You can use both maxsize and maxdepth.  maxdepth is by default set to = maxsize, which means that it is redundant.
-        :type maxdepth: int
-        :param fast_cycle: (experimental) - batch over population subsamples. This is a slightly different algorithm than regularized evolution, but does cycles 15% faster. May be algorithmically less efficient.
-        :type fast_cycle: bool
-        :param variable_names: a list of names for the variables, other than "x0", "x1", etc.
-        :type variable_names: list
-        :param warmup_maxsize_by: whether to slowly increase max size from a small number up to the maxsize (if greater than 0).  If greater than 0, says the fraction of training time at which the current maxsize will reach the user-passed maxsize.
-        :type warmup_maxsize_by: float
-        :param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
-        :type constraints: dict
-        :param nested_constraints: Specifies how many times a combination of operators can be nested. For example,
-        `{"sin": {"cos": 0}}, "cos": {"cos": 2}}` specifies that `cos` may never appear within a `sin`,
-        but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`
-        can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination
-        of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,
-        it is assumed that it can be nested an unlimited number of times. This requires that there is no operator
-        which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).
-        For binary operators, you only need to provide a single number: both arguments are treated the same way,
-        and the max of each argument is constrained.
-        :type nested_constraints: dict
-        :param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
-        :type use_frequency: bool
-        :param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
-        :type use_frequency_in_tournament: bool
-        :param tempdir: directory for the temporary files
-        :type tempdir: str/None
-        :param delete_tempfiles: whether to delete the temporary files after finishing
-        :type delete_tempfiles: bool
-        :param julia_project: a Julia environment location containing a Project.toml (and potentially the source code for SymbolicRegression.jl).  Default gives the Python package directory, where a Project.toml file should be present from the install.
-        :type julia_project: str/None
-        :param update: Whether to automatically update Julia packages.
-        :type update: bool
-        :param temp_equation_file: Whether to put the hall of fame file in the temp directory. Deletion is then controlled with the delete_tempfiles argument.
-        :type temp_equation_file: bool
-        :param output_jax_format: Whether to create a 'jax_format' column in the output, containing jax-callable functions and the default parameters in a jax array.
-        :type output_jax_format: bool
-        :param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
-        :type output_torch_format: bool
-        :param tournament_selection_n: Number of expressions to consider in each tournament.
-        :type tournament_selection_n: int
-        :param tournament_selection_p: Probability of selecting the best expression in each tournament. The probability will decay as p*(1-p)^n for other expressions, sorted by loss.
-        :type tournament_selection_p: float
-        :param precision: What precision to use for the data. By default this is 32 (float32), but you can select 64 or 16 as well.
-        :type precision: int
-        :param skip_mutation_failures: Whether to skip mutation and crossover failures, rather than simply re-sampling the current member.
-        :type skip_mutation_failures: bool
-        :param max_evals: Limits the total number of evaluations of expressions to this number.
-        :type max_evals: int
-        :param early_stop_condition: Stop the search early if this loss is reached.
-        :type early_stop_condition: float
-        :param kwargs: Supports deprecated keyword arguments. Other arguments will result
-        in an error
-        :type kwargs: dict
-        :returns: Initialized model. Call `.fit(X, y)` to fit your data!
-        :type: PySRRegressor
-        """
-        super().__init__()
-        # First, check for deprecated kwargs:
+
+        # Hyperparameters
+        ## Model search parameters
+        self.model_selection = model_selection
+        self.binary_operators = binary_operators
+        self.unary_operators = unary_operators
+        self.niterations = niterations
+        self.populations = populations
+        ## Model search Constraints
+        self.population_size = population_size
+        self.max_evals = max_evals
+        self.maxsize = maxsize
+        self.maxdepth = maxdepth
+        self.warmup_maxsize_by = warmup_maxsize_by
+        self.timeout_in_seconds = timeout_in_seconds
+        self.constraints = constraints
+        self.nested_constraints = nested_constraints
+        ## Loss parameters
+        self.loss = loss
+        self.complexity_of_operators = complexity_of_operators
+        self.complexity_of_constants = complexity_of_constants
+        self.complexity_of_variables = complexity_of_variables
+        self.parsimony = float(parsimony)
+        self.use_frequency = use_frequency
+        self.use_frequency_in_tournament = use_frequency_in_tournament
+        self.alpha = alpha
+        self.annealing = annealing
+        self.early_stop_condition = early_stop_condition
+        ## Evolutionary search parameters
+        ### Mutation parameters
+        self.ncyclesperiteration = ncyclesperiteration
+        self.fraction_replaced = fraction_replaced
+        self.fraction_replaced_hof = fraction_replaced_hof
+        self.weight_add_node = weight_add_node
+        self.weight_insert_node = weight_insert_node
+        self.weight_delete_node = weight_delete_node
+        self.weight_do_nothing = weight_do_nothing
+        self.weight_mutate_constant = weight_mutate_constant
+        self.weight_mutate_operator = weight_mutate_operator
+        self.weight_randomize = weight_randomize
+        self.weight_simplify = weight_simplify
+        self.crossover_probability = crossover_probability
+        self.skip_mutation_failures = skip_mutation_failures
+        ### Migration parameters
+        self.migration = migration
+        self.hof_migration = hof_migration
+        self.topn = topn
+        ### Constants parameters
+        self.should_optimize_constants = should_optimize_constants
+        self.optimizer_algorithm = optimizer_algorithm
+        self.optimizer_nrestarts = optimizer_nrestarts
+        self.optimize_probability = optimize_probability
+        self.optimizer_iterations = optimizer_iterations
+        self.perturbation_factor = perturbation_factor
+        ### Selection parameters
+        self.tournament_selection_n = tournament_selection_n
+        self.tournament_selection_p = tournament_selection_p
+        # Solver parameters
+        self.procs = procs
+        self.multithreading = multithreading
+        self.cluster_manager = cluster_manager
+        self.batching = batching
+        self.batch_size = batch_size
+        self.fast_cycle = fast_cycle
+        self.precision = precision
+        # Additional runtime parameters
+        ## Runtime user interface
+        self.verbosity = verbosity
+        self.update_verbosity = update_verbosity
+        self.progress = progress
+        ## Project management
+        self.equation_file = equation_file
+        self.temp_equation_file = temp_equation_file
+        self.tempdir = tempdir
+        self.delete_tempfiles = delete_tempfiles
+        self.julia_project = julia_project
+        self.update = update
+        self.output_jax_format = output_jax_format
+        self.output_torch_format = output_torch_format
+        self.extra_sympy_mappings = extra_sympy_mappings
+        self.extra_jax_mappings = extra_jax_mappings
+        self.extra_torch_mappings = extra_torch_mappings
+        # Pre-modelling transformation
+        self.denoise = denoise
+        self.select_k_features = select_k_features
+
+        # Once all valid parameters have been assigned handle the
+        # deprecated kwargs
         if len(kwargs) > 0:  # pragma: no cover
             deprecated_kwargs = make_deprecated_kwargs_for_pysr_regressor()
             for k, v in kwargs.items():
-                if k == "fractionReplaced":
-                    fraction_replaced = v
-                elif k == "fractionReplacedHof":
-                    fraction_replaced_hof = v
-                elif k == "npop":
-                    population_size = v
-                elif k == "hofMigration":
-                    hof_migration = v
-                elif k == "shouldOptimizeConstants":
-                    should_optimize_constants = v
-                elif k == "weightAddNode":
-                    weight_add_node = v
-                elif k == "weightDeleteNode":
-                    weight_delete_node = v
-                elif k == "weightDoNothing":
-                    weight_do_nothing = v
-                elif k == "weightInsertNode":
-                    weight_insert_node = v
-                elif k == "weightMutateConstant":
-                    weight_mutate_constant = v
-                elif k == "weightMutateOperator":
-                    weight_mutate_operator = v
-                elif k == "weightRandomize":
-                    weight_randomize = v
-                elif k == "weightSimplify":
-                    weight_simplify = v
-                elif k == "crossoverProbability":
-                    crossover_probability = v
-                elif k == "perturbationFactor":
-                    perturbation_factor = v
-                elif k == "batchSize":
-                    batch_size = v
-                elif k == "warmupMaxsizeBy":
-                    warmup_maxsize_by = v
-                elif k == "useFrequency":
-                    use_frequency = v
-                elif k == "useFrequencyInTournament":
-                    use_frequency_in_tournament = v
+                # Handle renamed kwargs
+                if k in deprecated_kwargs:
+                    updated_kwarg_name = deprecated_kwargs[k]
+                    setattr(self, updated_kwarg_name, v)
+                    warnings.warn(
+                        f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
+                        f" Please use that instead.",
+                        FutureWarning,
+                    )
+                # Handle kwargs that have been moved to the fit method
+                elif k in ["weights", "variable_names", "Xresampled"]:
+                    warnings.warn(
+                        f"{k} is a data dependant parameter so should be passed when fit is called. "
+                        f"Ignoring parameter; please pass {k} during the call to fit instead.",
+                        FutureWarning,
+                    )
                 else:
                     raise TypeError(
                         f"{k} is not a valid keyword argument for PySRRegressor"
                     )
 
-                updated_name = deprecated_kwargs[k]
-                warnings.warn(
-                    f"{k} has been renamed to {updated_name} in PySRRegressor."
-                    f" Please use that instead.",
-                )
-        self.model_selection = model_selection
-
-        if binary_operators is None:
-            binary_operators = "+ * - /".split(" ")
-        if unary_operators is None:
-            unary_operators = []
-        if extra_sympy_mappings is None:
-            extra_sympy_mappings = {}
-        if variable_names is None:
-            variable_names = []
-        if constraints is None:
-            constraints = {}
-        if multithreading is None:
-            # Default is multithreading=True, unless explicitly set,
-            # or procs is set to 0 (serial mode).
-            multithreading = procs != 0 and cluster_manager is None
-        if update_verbosity is None:
-            update_verbosity = verbosity
-
-        buffer_available = "buffer" in sys.stdout.__dir__()
-
-        if progress is not None:
-            if progress and not buffer_available:
-                warnings.warn(
-                    "Note: it looks like you are running in Jupyter. The progress bar will be turned off."
-                )
-                progress = False
-        else:
-            progress = buffer_available
-
-        assert optimizer_algorithm in ["NelderMead", "BFGS"]
-        assert tournament_selection_n < population_size
-
-        if extra_jax_mappings is not None:
-            for value in extra_jax_mappings.values():
-                if not isinstance(value, str):
-                    raise NotImplementedError(
-                        "extra_jax_mappings must have keys that are strings! e.g., {sympy.sqrt: 'jnp.sqrt'}."
-                    )
-        else:
-            extra_jax_mappings = {}
-
-        if extra_torch_mappings is not None:
-            for value in extra_jax_mappings.values():
-                if not callable(value):
-                    raise NotImplementedError(
-                        "extra_torch_mappings must be callable functions! e.g., {sympy.sqrt: torch.sqrt}."
-                    )
-        else:
-            extra_torch_mappings = {}
-
-        if maxsize > 40:
-            warnings.warn(
-                "Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory."
-            )
-        elif maxsize < 7:
-            raise NotImplementedError("PySR requires a maxsize of at least 7")
-
-        if maxdepth is None:
-            maxdepth = maxsize
-
-        if isinstance(binary_operators, str):
-            binary_operators = [binary_operators]
-        if isinstance(unary_operators, str):
-            unary_operators = [unary_operators]
-
-        self.params = {
-            **dict(
-                weights=weights,
-                binary_operators=binary_operators,
-                unary_operators=unary_operators,
-                procs=procs,
-                loss=loss,
-                complexity_of_operators=complexity_of_operators,
-                complexity_of_constants=complexity_of_constants,
-                complexity_of_variables=complexity_of_variables,
-                populations=populations,
-                niterations=niterations,
-                ncyclesperiteration=ncyclesperiteration,
-                timeout_in_seconds=timeout_in_seconds,
-                alpha=alpha,
-                annealing=annealing,
-                fraction_replaced=fraction_replaced,
-                fraction_replaced_hof=fraction_replaced_hof,
-                population_size=population_size,
-                parsimony=float(parsimony),
-                migration=migration,
-                hof_migration=hof_migration,
-                should_optimize_constants=should_optimize_constants,
-                topn=topn,
-                weight_add_node=weight_add_node,
-                weight_insert_node=weight_insert_node,
-                weight_delete_node=weight_delete_node,
-                weight_do_nothing=weight_do_nothing,
-                weight_mutate_constant=weight_mutate_constant,
-                weight_mutate_operator=weight_mutate_operator,
-                weight_randomize=weight_randomize,
-                weight_simplify=weight_simplify,
-                crossover_probability=crossover_probability,
-                perturbation_factor=perturbation_factor,
-                verbosity=verbosity,
-                update_verbosity=update_verbosity,
-                progress=progress,
-                maxsize=maxsize,
-                fast_cycle=fast_cycle,
-                maxdepth=maxdepth,
-                batching=batching,
-                batch_size=batch_size,
-                select_k_features=select_k_features,
-                warmup_maxsize_by=warmup_maxsize_by,
-                constraints=constraints,
-                nested_constraints=nested_constraints,
-                use_frequency=use_frequency,
-                use_frequency_in_tournament=use_frequency_in_tournament,
-                tempdir=tempdir,
-                delete_tempfiles=delete_tempfiles,
-                update=update,
-                temp_equation_file=temp_equation_file,
-                optimizer_algorithm=optimizer_algorithm,
-                optimizer_nrestarts=optimizer_nrestarts,
-                optimize_probability=optimize_probability,
-                optimizer_iterations=optimizer_iterations,
-                tournament_selection_n=tournament_selection_n,
-                tournament_selection_p=tournament_selection_p,
-                denoise=denoise,
-                Xresampled=Xresampled,
-                precision=precision,
-                multithreading=multithreading,
-                cluster_manager=cluster_manager,
-                skip_mutation_failures=skip_mutation_failures,
-                max_evals=max_evals,
-                early_stop_condition=early_stop_condition,
-            ),
-        }
-
-        # Stored equations:
-        self.equations = None
-        self.params_hash = None
-        self.raw_julia_state = None
-
-        self.multioutput = None
-        self.equation_file = equation_file
-        self.n_features = None
-        self.extra_sympy_mappings = extra_sympy_mappings
-        self.extra_torch_mappings = extra_torch_mappings
-        self.extra_jax_mappings = extra_jax_mappings
-        self.output_jax_format = output_jax_format
-        self.output_torch_format = output_torch_format
-        self.nout = 1
-        self.selection = None
-        self.variable_names = variable_names
-        self.julia_project = julia_project
-
-        self.surface_parameters = [
-            "model_selection",
-            "multioutput",
-            "equation_file",
-            "n_features",
-            "extra_sympy_mappings",
-            "extra_torch_mappings",
-            "extra_jax_mappings",
-            "output_jax_format",
-            "output_torch_format",
-            "nout",
-            "selection",
-            "variable_names",
-            "julia_project",
-        ]
-
     def __repr__(self):
         """Prints all current equations fitted by the model.
 
         The string `>>>>` denotes which equation is selected by the
         `model_selection`.
         """
-        if not hasattr(self, "equations") or self.equations is None:
-            return "PySRRegressor.equations = None"
+        if not hasattr(self, "equations_") or self.equations_ is None:
+            return "PySRRegressor.equations_ = None"
 
-        output = "PySRRegressor.equations = [\n"
+        output = "PySRRegressor.equations_ = [\n"
 
-        equations = self.equations
+        equations = self.equations_
         if not isinstance(equations, list):
             all_equations = [equations]
         else:
@@ -815,345 +827,311 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
         output += "]"
         return output
 
-    def set_params(self, **params):
-        """Set parameters for equation search."""
-        for key, value in params.items():
-            if key in self.surface_parameters:
-                self.__setattr__(key, value)
-            elif key in self.params:
-                self.params[key] = value
-            else:
-                raise ValueError(f"Parameter {key} is not in the list of parameters.")
-
-        return self
-
-    def get_params(self, deep=True):
-        """Get parameters for equation search."""
-        del deep
-        return {
-            **self.params,
-            **{key: self.__getattribute__(key) for key in self.surface_parameters},
-        }
-
     def get_best(self, index=None):
-        """Get best equation using `model_selection`.
-
-        :param index: Optional. If you wish to select a particular equation
-            from `self.equations`, give the row number here. This overrides
-            the `model_selection` parameter.
-        :type index: int
-        :returns: Dictionary representing the best expression found.
-        :type: pd.Series
         """
-        if self.equations is None:
+        Get best equation using `model_selection`.
+
+        Parameters
+        ----------
+        index : int, default=None
+            If you wish to select a particular equation from `self.equations_`,
+            give the row number here. This overrides the :param`model_selection`
+            parameter.
+
+        Returns
+        -------
+        best_equation : pandas.Series
+            Dictionary representing the best expression found.
+
+        Raises
+        ------
+        NotImplementedError
+            Raised when an invalid model selection strategy is provided.
+        """
+        if self.equations_ is None:
             raise ValueError("No equations have been generated yet.")
 
         if index is not None:
-            if isinstance(self.equations, list):
+            if isinstance(self.equations_, list):
                 assert isinstance(index, list)
-                return [eq.iloc[i] for eq, i in zip(self.equations, index)]
-            return self.equations.iloc[index]
+                return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
+            return self.equations_.iloc[index]
 
         if self.model_selection == "accuracy":
-            if isinstance(self.equations, list):
-                return [eq.iloc[-1] for eq in self.equations]
-            return self.equations.iloc[-1]
+            if isinstance(self.equations_, list):
+                return [eq.iloc[-1] for eq in self.equations_]
+            return self.equations_.iloc[-1]
         elif self.model_selection == "best":
-            if isinstance(self.equations, list):
-                return [eq.iloc[eq["score"].idxmax()] for eq in self.equations]
-            return self.equations.iloc[self.equations["score"].idxmax()]
+            if isinstance(self.equations_, list):
+                return [eq.iloc[eq["score"].idxmax()] for eq in self.equations_]
+            return self.equations_.iloc[self.equations_["score"].idxmax()]
         else:
             raise NotImplementedError(
                 f"{self.model_selection} is not a valid model selection strategy."
             )
 
-    def fit(self, X, y, weights=None, variable_names=None):
-        """Search for equations to fit the dataset and store them in `self.equations`.
-
-        :param X: 2D array. Rows are examples, columns are features. If pandas DataFrame, the columns are used for variable names (so make sure they don't contain spaces).
-        :type X: np.ndarray/pandas.DataFrame
-        :param y: 1D array (rows are examples) or 2D array (rows are examples, columns are outputs). Putting in a 2D array will trigger a search for equations for each feature of y.
-        :type y: np.ndarray
-        :param weights: Optional. Same shape as y. Each element is how to weight the mean-square-error loss for that particular element of y.
-        :type weights: np.ndarray
-        :param variable_names: a list of names for the variables, other than "x0", "x1", etc.
-            You can also pass a pandas DataFrame for X.
-        :type variable_names: list
+    def _validate_params(self, n_samples):
         """
-        if variable_names is None:
-            variable_names = self.variable_names
-
-        self._run(
-            X=X,
-            y=y,
-            weights=weights,
-            variable_names=variable_names,
-        )
-
-        return self
-
-    def refresh(self):
-        # Updates self.equations with any new options passed,
-        # such as extra_sympy_mappings.
-        self.equations = self.get_hof()
-
-    def predict(self, X, index=None):
-        """Predict y from input X using the equation chosen by `model_selection`.
-
-        You may see what equation is used by printing this object. X should have the same
-        columns as the training data.
+        Perform validation on the parameters defined in init for the
+        dataset specified in :term`fit`.
+
+        Parameters
+        ----------
+        n_samples : int
+            Number of samples in the dataset to be fitted.
+
+        Returns
+        -------
+        self : object
+            Reference to `self` with validated parameters.
+
+        Raises
+        ------
+        ValueError
+            Raised when on of the following occours: `tournament_selection_n`
+            parameter is larger than `population_size`; `maxsize` is
+            less than 7; invalid `extra_jax_mappings` or
+            `extra_torch_mappings`; invalid optimizer algorithms.
 
-        :param X: 2D array. Rows are examples, columns are features. If pandas DataFrame, the columns are used for variable names (so make sure they don't contain spaces).
-        :type X: np.ndarray/pandas.DataFrame
-        :param index: Optional. If you want to compute the output of
-            an expression using a particular row of
-            `self.equations`, you may specify the index here.
-        :type index: int
-        :returns: 1D array (rows are examples) or 2D array (rows are examples, columns are outputs).
-        :type: np.ndarray
         """
-        self.refresh()
-        best = self.get_best(index=index)
-        try:
-            if self.multioutput:
-                return np.stack([eq["lambda_format"](X) for eq in best], axis=1)
-            return best["lambda_format"](X)
-        except Exception as error:
-            # Add extra information to the error, to say that the user
-            # should try to adjust extra_sympy_params.
-            raise ValueError(
-                "Failed to evaluate the expression. "
-                "If you are using a custom operator, make sure to define it in extra_sympy_mappings, "
-                "e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
-            ) from error
-
-    def sympy(self, index=None):
-        """Return sympy representation of the equation(s) chosen by `model_selection`.
-
-        :param index: Optional. If you wish to select a particular equation
-            from `self.equations`, give the index number here. This overrides
-            the `model_selection` parameter.
-        :type index: int
-        :returns: SymPy representation of the best expression.
-        """
-        self.refresh()
-        best = self.get_best(index=index)
-        if self.multioutput:
-            return [eq["sympy_format"] for eq in best]
-        return best["sympy_format"]
-
-    def latex(self, index=None):
-        """Return latex representation of the equation(s) chosen by `model_selection`.
-
-        :param index: Optional. If you wish to select a particular equation
-            from `self.equations`, give the index number here. This overrides
-            the `model_selection` parameter.
-        :type index: int
-        :returns: LaTeX expression as a string
-        :type: str
-        """
-        self.refresh()
-        sympy_representation = self.sympy(index=index)
-        if self.multioutput:
-            return [sympy.latex(s) for s in sympy_representation]
-        return sympy.latex(sympy_representation)
-
-    def jax(self, index=None):
-        """Return jax representation of the equation(s) chosen by `model_selection`.
+        # Handle None values for instance parameters:
+        if self.multithreading is None:
+            # Default is multithreading=True, unless explicitly set,
+            # or procs is set to 0 (serial mode).
+            self.multithreading = self.procs != 0 and self.cluster_manager is None
+        if self.update_verbosity is None:
+            self.update_verbosity = self.verbosity
+        if self.maxdepth is None:
+            self.maxdepth = self.maxsize
+
+        # Cast tempdir string as a Path object
+        self.tempdir_ = Path(tempfile.mkdtemp(dir=self.tempdir))
+        if self.temp_equation_file:
+            self.equation_file = self.tempdir_ / "hall_of_fame.csv"
+        elif self.equation_file is None:
+            date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
+            self.equation_file = "hall_of_fame_" + date_time + ".csv"
 
-        Each equation (multiple given if there are multiple outputs) is a dictionary
-        containing {"callable": func, "parameters": params}. To call `func`, pass
-        func(X, params). This function is differentiable using `jax.grad`.
+        # Handle type conversion for instance parameters:
+        if isinstance(self.binary_operators, str):
+            self.binary_operators = [self.binary_operators]
+        if isinstance(self.unary_operators, str):
+            self.unary_operators = [self.unary_operators]
 
-        :param index: Optional. If you wish to select a particular equation
-            from `self.equations`, give the index number here. This overrides
-            the `model_selection` parameter.
-        :type index: int
-        :returns: Dictionary of callable jax function in "callable" key,
-            and jax array of parameters as "parameters" key.
-        :type: dict
-        """
-        if self.using_pandas:
+        # Warn if instance parameters are not sensible values:
+        if self.batch_size < 1:
             warnings.warn(
-                "PySR's JAX modules are not set up to work with a "
-                "model that was trained on pandas dataframes. "
-                "Train on an array instead to ensure everything works as planned."
+                f"Given :param`batch_size` must be greater than or equal to one."
+                f":param`batch_size` has been increased to equal one."
             )
-        self.set_params(output_jax_format=True)
-        self.refresh()
-        best = self.get_best(index=index)
-        if self.multioutput:
-            return [eq["jax_format"] for eq in best]
-        return best["jax_format"]
+            self.batch_size = 1
 
-    def pytorch(self, index=None):
-        """Return pytorch representation of the equation(s) chosen by `model_selection`.
-
-        Each equation (multiple given if there are multiple outputs) is a PyTorch module
-        containing the parameters as trainable attributes. You can use the module like
-        any other PyTorch module: `module(X)`, where `X` is a tensor with the same
-        column ordering as trained with.
+        if n_samples > 10000 and not self.batching:
+            warnings.warn(
+                """
+                Note: you are running with more than 10,000 datapoints. 
+                You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). 
+                You should also reconsider if you need that many datapoints. 
+                Unless you have a large amount of noise (in which case you 
+                should smooth your dataset first), generally < 10,000 datapoints 
+                is enough to find a functional form with symbolic regression. 
+                More datapoints will lower the search speed."
+                """,
+            )
 
+        # Ensure instance parameters are allowable values:
+        # ValueError - Incompatible values
+        if not (self.tournament_selection_n < self.population_size):
+            raise ValueError(
+                f"tournament_selection_n parameter must be smaller than population_size"
+            )
 
-        :param index: Optional. If you wish to select a particular equation
-            from `self.equations`, give the row number here. This overrides
-            the `model_selection` parameter.
-        :type index: int
-        :returns: PyTorch module representing the expression.
-        :type: torch.nn.Module
-        """
-        if self.using_pandas:
+        if self.maxsize > 40:
             warnings.warn(
-                "PySR's PyTorch modules are not set up to work with a "
-                "model that was trained on pandas dataframes. "
-                "Train on an array instead to ensure everything works as planned."
+                "Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory. You should consider turning `use_frequency` to False, and perhaps use `warmup_maxsize_by`."
             )
-        self.set_params(output_torch_format=True)
-        self.refresh()
-        best = self.get_best(index=index)
-        if self.multioutput:
-            return [eq["torch_format"] for eq in best]
-        return best["torch_format"]
+        elif self.maxsize < 7:
+            raise ValueError(f"PySR requires a maxsize of at least 7")
 
-    def reset(self):
-        """Reset the search state."""
-        self.equations = None
-        self.params_hash = None
-        self.raw_julia_state = None
-        self.variable_names = None
-        self.selection = None
-
-    def _run(self, X, y, weights, variable_names):
-        global already_ran
-        global Main
+        if self.extra_jax_mappings is not None:
+            for value in self.extra_jax_mappings.values():
+                if not isinstance(value, str):
+                    raise ValueError(
+                        "extra_jax_mappings must have keys that are strings! e.g., {sympy.sqrt: 'jnp.sqrt'}."
+                    )
+        else:
+            self.extra_jax_mappings = {}
 
-        for key in self.surface_parameters:
-            if key in self.params:
-                raise ValueError(
-                    f"{key} is a surface parameter, and cannot be in self.params"
-                )
+        if self.extra_torch_mappings is not None:
+            for value in self.extra_jax_mappings.values():
+                if not callable(value):
+                    raise ValueError(
+                        "extra_torch_mappings must be callable functions! e.g., {sympy.sqrt: torch.sqrt}."
+                    )
+        else:
+            self.extra_torch_mappings = {}
 
-        multithreading = self.params["multithreading"]
-        cluster_manager = self.params["cluster_manager"]
-        procs = self.params["procs"]
-        binary_operators = self.params["binary_operators"]
-        unary_operators = self.params["unary_operators"]
-        batching = self.params["batching"]
-        maxsize = self.params["maxsize"]
-        select_k_features = self.params["select_k_features"]
-        Xresampled = self.params["Xresampled"]
-        denoise = self.params["denoise"]
-        constraints = self.params["constraints"]
-        update = self.params["update"]
-        loss = self.params["loss"]
-        weight_mutate_constant = self.params["weight_mutate_constant"]
-        weight_mutate_operator = self.params["weight_mutate_operator"]
-        weight_add_node = self.params["weight_add_node"]
-        weight_insert_node = self.params["weight_insert_node"]
-        weight_delete_node = self.params["weight_delete_node"]
-        weight_simplify = self.params["weight_simplify"]
-        weight_randomize = self.params["weight_randomize"]
-        weight_do_nothing = self.params["weight_do_nothing"]
+        # NotImplementedError - Currently incompatible values that could be supported later
+        if self.optimizer_algorithm not in self.VALID_OPTIMIZER_ALGORITHMS:
+            raise NotImplementedError(
+                f"PySR currently only supports the following optimizer algorithms: {self.VALID_OPTIMIZER_ALGORITHMS}"
+            )
 
-        if Main is None:
-            if multithreading:
-                os.environ["JULIA_NUM_THREADS"] = str(procs)
+        # Handle presentation of the progress bar:
+        buffer_available = "buffer" in sys.stdout.__dir__()
+        if self.progress is not None:
+            if self.progress and not buffer_available:
+                warnings.warn(
+                    "Note: it looks like you are running in Jupyter. The progress bar will be turned off."
+                )
+                self.progress = False
+        else:
+            self.progress = buffer_available
 
-            Main = init_julia()
+        return self
 
-        if cluster_manager is not None:
-            Main.eval(f"import ClusterManagers: addprocs_{cluster_manager}")
-            cluster_manager = Main.eval(f"addprocs_{cluster_manager}")
+    def _validate_fit_params(self, X, y, Xresampled, variable_names):
+        """
+        Validates the parameters passed to the :term`fit` method.
 
-        if isinstance(X, pd.DataFrame):
-            if variable_names is not None:
-                warnings.warn("Resetting variable_names from X.columns")
+        This method also setts the `nout_` attribute.
 
-            variable_names = list(X.columns)
-            X = np.array(X)
-            self.using_pandas = True
-        else:
-            self.using_pandas = False
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Training data.
 
-        if len(X.shape) == 1:
-            X = X[:, None]
+        y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary.
 
-        if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
-            y = np.array(y)
+        Xresampled : {ndarray | pandas.DataFrame} of shape (n_resampled, n_features), default=None
+            Resampled training data used for denoising.
 
-        if variable_names is None or len(variable_names) == 0:
-            variable_names = [f"x{i}" for i in range(X.shape[1])]
+        variable_names : list[str] of length n_features
+            Names of each variable in the training dataset, `X`.
 
-        use_custom_variable_names = len(variable_names) != 0
-        # TODO: this is always true.
+        Returns
+        -------
+        X_validated : ndarray of shape (n_samples, n_features)
+            Validated training data.
 
-        _check_assertions(
-            X,
-            binary_operators,
-            unary_operators,
-            use_custom_variable_names,
-            variable_names,
-            weights,
-            y,
-        )
+        y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
+            Validatee target data.
 
-        self.n_features = X.shape[1]
+        variable_names_validated : list[str] of length n_features
+            Validated list of variable names for each feature in `X`.
 
-        if len(X) > 10000 and not batching:
-            warnings.warn(
-                "Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed."
-            )
+        """
 
-        if self.n_features >= 10 and not select_k_features:
+        if isinstance(X, pd.DataFrame):
+            variable_names = None
             warnings.warn(
-                "Note: you are running with 10 features or more. "
-                "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
-                "Consider using feature selection techniques to select the most important features "
-                "(you can do this automatically with the `select_k_features` parameter), "
-                "or, alternatively, doing a dimensionality reduction beforehand. "
-                "For example, `X = PCA(n_components=6).fit_transform(X)`, "
-                "using scikit-learn's `PCA` class, "
-                "will reduce the number of features to 6 in an interpretable way, "
-                "as each resultant feature "
-                "will be a linear combination of the original features. "
+                ":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
+                "Will use DataFrame column names instead."
             )
-
-        X, selection = _handle_feature_selection(
-            X, select_k_features, y, variable_names
-        )
-
+            
+            if X.columns.is_object() and X.columns.str.contains(" ").any():
+                X.columns = X.columns.str.replace(" ", "_")
+                warnings.warn(
+                    "Spaces in DataFrame column names are not supported. "
+                    "Spaces have been replaced with underscores. \n"
+                    "Please rename the columns to valid names."
+                )
+        elif variable_names:
+            if [" " in name for name in variable_names].any():
+                variable_names = [name.replace(" ", "_") for name in variable_names]
+                warnings.warn(
+                    "Spaces in `variable_names` are not supported. "
+                    "Spaces have been replaced with underscores. \n"
+                    "Please use valid names instead."
+                )
+        # Only numpy values are needed from Xresampled, column metadata is
+        # provided by X
+        if isinstance(Xresampled, pd.DataFrame):
+            Xresampled = Xresampled.values
+
+        # Data validation and feature name fetching via sklearn
+        # This method sets the n_features_in_ attribute
+        X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
+        self.feature_names_in_ = _check_feature_names_in(self, variable_names)
+        variable_names = self.feature_names_in_
+
+        # Handle multioutput data
         if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
-            self.multioutput = False
-            self.nout = 1
             y = y.reshape(-1)
         elif len(y.shape) == 2:
-            self.multioutput = True
-            self.nout = y.shape[1]
+            self.nout_ = y.shape[1]
         else:
             raise NotImplementedError("y shape not supported!")
 
-        if denoise:
-            if weights is not None:
-                raise NotImplementedError(
-                    "No weights for denoising - the weights are learned."
-                )
+        return X, y, variable_names
+
+    def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
+        """
+        Transforms the training data before fitting the symbolic regressor.
+
+        This method also updates/sets the `selection_mask_` attribute.
+
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Training data.
+
+        y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary.
+
+        Xresampled : {ndarray | pandas.DataFrame} of shape (n_resampled, n_features), default=None
+            Resampled training data used for denoising.
+
+        variable_names : list[str] of length n_features
+            Names of each variable in the training dataset, `X`.
+
+        Returns
+        -------
+        X_transformed : ndarray of shape (n_samples, n_features)
+            Transformed training data. n_samples will be equal to
+            :param`Xresampled.shape[0]` if :param`self.denoise` is `True`,
+            and :param`Xresampled is not None`, otherwise it will be
+            equal to :param`X.shape[0]`. n_features will be equal to
+            :param`self.select_k_features` if `self.select_k_features is not None`,
+            otherwise it will be equal to :param`X.shape[1]`
+
+        y_transformed : ndarray of shape (n_samples,) or  (n_samples, n_outputs)
+            Transformed target data. n_samples will be equal to
+            :param`Xresampled.shape[0]` if :param`self.denoise` is `True`,
+            and :param`Xresampled is not None`, otherwise it will be
+            equal to :param`X.shape[0]`.
+
+        variable_names_transformed : list[str] of length n_features
+            Names of each variable in the transformed dataset,
+            `X_transformed`.
+        """
+        # Feature selection transformation
+        if self.select_k_features:
+            self.selection_mask_ = run_feature_selection(X, y, self.select_k_features)
+            X = X[:, self.selection_mask_]
+
             if Xresampled is not None:
-                # Select among only the selected features:
-                if isinstance(Xresampled, pd.DataFrame):
-                    # Handle Xresampled is pandas dataframe
-                    if selection is not None:
-                        Xresampled = Xresampled[[variable_names[i] for i in selection]]
-                    else:
-                        Xresampled = Xresampled[variable_names]
-                    Xresampled = np.array(Xresampled)
-                else:
-                    if selection is not None:
-                        Xresampled = Xresampled[:, selection]
-            if self.multioutput:
-                y = np.stack(
+                Xresampled = Xresampled[:, self.selection_mask_]
+
+            # Reduce variable_names to selection
+            variable_names = [variable_names[i] for i in self.selection_mask_]
+
+            # Re-perform data validation and feature name updating
+            X, y_transformed = self._validate_data(
+                X=X, y=y, reset=True, multi_output=True
+            )
+            # Update feature names with selected variable names
+            self.feature_names_in_ = _check_feature_names_in(self, variable_names)
+            print(f"Using features {[name for name in self.feature_names_in_]}")
+
+        # Denoising transformation
+        if self.denoise:
+            if self.nout_ > 1:
+                y_transformed = np.stack(
                     [
                         _denoise(X, y[:, i], Xresampled=Xresampled)[1]
-                        for i in range(self.nout)
+                        for i in range(self.nout_)
                     ],
                     axis=1,
                 )
@@ -1162,31 +1140,55 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
             else:
                 X, y = _denoise(X, y, Xresampled=Xresampled)
 
-        self.julia_project, is_shared = _get_julia_project(self.julia_project)
+        return X, y, variable_names
 
-        tmpdir = Path(tempfile.mkdtemp(dir=self.params["tempdir"]))
+    def _run(self, X, y, weights):
+        """
+        Run the symbolic regression fitting process on the julia backend.
 
-        if self.params["temp_equation_file"]:
-            self.equation_file = tmpdir / "hall_of_fame.csv"
-        elif self.equation_file is None:
-            date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
-            self.equation_file = "hall_of_fame_" + date_time + ".csv"
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Training data.
 
-        _create_inline_operators(
-            binary_operators=binary_operators, unary_operators=unary_operators
-        )
-        _handle_constraints(
-            binary_operators=binary_operators,
-            unary_operators=unary_operators,
-            constraints=constraints,
-        )
+        y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary.
+
+        weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
+            Each element is how to weight the mean-square-error loss
+            for that particular element of y.
+
+        Returns
+        -------
+        self : object
+            Reference to `self` with fitted attributes.
+
+        Raises
+        ------
+        ImportError
+            Raised when the julia backend fails to import a package.
+        """
+
+        # Need to be global as we don't want to recreate/reinstate julia for every new instance of PySRRegressor
+        global already_ran
+        global Main
 
-        una_constraints = [constraints[op] for op in unary_operators]
-        bin_constraints = [constraints[op] for op in binary_operators]
+        # Start julia backend processes
+        if Main is None:
+            if self.multithreading:
+                os.environ["JULIA_NUM_THREADS"] = str(self.procs)
+
+            Main = init_julia()
+
+        if self.cluster_manager is not None:
+            Main.eval(f"import ClusterManagers: addprocs_{self.cluster_manager}")
+            self.cluster_manager = Main.eval(f"addprocs_{self.cluster_manager}")
+
+        self.julia_project, is_shared = _get_julia_project(self.julia_project)
 
         if not already_ran:
             Main.eval("using Pkg")
-            io = "devnull" if self.params["update_verbosity"] == 0 else "stderr"
+            io = "devnull" if self.update_verbosity == 0 else "stderr"
             io_arg = f"io={io}" if is_julia_version_greater_eq(Main, "1.6") else ""
 
             Main.eval(
@@ -1199,7 +1201,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
                 _add_sr_to_julia_project(Main, io_arg)
 
             try:
-                if update:
+                if self.update:
                     Main.eval(f"Pkg.resolve({io_arg})")
                     Main.eval(f"Pkg.instantiate({io_arg})")
                 else:
@@ -1214,129 +1216,108 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
             Main.pow = Main.eval("(^)")
             Main.div = Main.eval("(/)")
 
-        nested_constraints = self.params["nested_constraints"]
+        _create_inline_operators(
+            binary_operators=self.binary_operators, unary_operators=self.unary_operators
+        )
+        _handle_constraints(
+            binary_operators=self.binary_operators,
+            unary_operators=self.unary_operators,
+            constraints=self.constraints,
+        )
+
+        una_constraints = [self.constraints[op] for op in self.unary_operators]
+        bin_constraints = [self.constraints[op] for op in self.binary_operators]
+
         # Parse dict into Julia Dict for nested constraints::
-        if nested_constraints is not None:
+        if self.nested_constraints is not None:
             nested_constraints_str = "Dict("
-            for outer_k, outer_v in nested_constraints.items():
+            for outer_k, outer_v in self.nested_constraints.items():
                 nested_constraints_str += f"({outer_k}) => Dict("
                 for inner_k, inner_v in outer_v.items():
                     nested_constraints_str += f"({inner_k}) => {inner_v}, "
                 nested_constraints_str += "), "
             nested_constraints_str += ")"
-            nested_constraints = Main.eval(nested_constraints_str)
+            self.nested_constraints = Main.eval(nested_constraints_str)
 
         # Parse dict into Julia Dict for complexities:
-        complexity_of_operators = self.params["complexity_of_operators"]
-        if complexity_of_operators is not None:
+        if self.complexity_of_operators is not None:
             complexity_of_operators_str = "Dict("
-            for k, v in complexity_of_operators.items():
+            for k, v in self.complexity_of_operators.items():
                 complexity_of_operators_str += f"({k}) => {v}, "
             complexity_of_operators_str += ")"
-            complexity_of_operators = Main.eval(complexity_of_operators_str)
+            self.complexity_of_operators = Main.eval(complexity_of_operators_str)
 
-        Main.custom_loss = Main.eval(loss)
+        Main.custom_loss = Main.eval(self.loss)
 
         mutationWeights = [
-            float(weight_mutate_constant),
-            float(weight_mutate_operator),
-            float(weight_add_node),
-            float(weight_insert_node),
-            float(weight_delete_node),
-            float(weight_simplify),
-            float(weight_randomize),
-            float(weight_do_nothing),
+            float(self.weight_mutate_constant),
+            float(self.weight_mutate_operator),
+            float(self.weight_add_node),
+            float(self.weight_insert_node),
+            float(self.weight_delete_node),
+            float(self.weight_simplify),
+            float(self.weight_randomize),
+            float(self.weight_do_nothing),
         ]
 
-        params_to_hash = {
-            **{k: self.__getattribute__(k) for k in self.surface_parameters},
-            **self.params,
-        }
-        params_excluded_from_hash = [
-            "niterations",
-        ]
-        # Delete these^ from params_to_hash:
-        params_to_hash = {
-            k: v
-            for k, v in params_to_hash.items()
-            if k not in params_excluded_from_hash
-        }
-
-        # Sort params_to_hash by key:
-        params_to_hash = OrderedDict(sorted(params_to_hash.items()))
-        # Hash all parameters:
-        cur_hash = sha256(str(params_to_hash).encode()).hexdigest()
-
-        if self.params_hash is not None:
-            if cur_hash != self.params_hash:
-                warnings.warn(
-                    "Warning: PySR options have changed since the last run. "
-                    "This is experimental and may not work. "
-                    "For example, if the operators change, or even their order,"
-                    " the saved equations will be in the wrong format."
-                    "\n\n"
-                    "To reset the search state, run `.reset()`. "
-                )
-
-        self.params_hash = cur_hash
-
         # Call to Julia backend.
         # See https://github.com/search?q=%22function+Options%22+repo%3AMilesCranmer%2FSymbolicRegression.jl+path%3A%2Fsrc%2F+filename%3AOptions.jl+language%3AJulia&type=Code
         options = Main.Options(
-            binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
-            unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
+            binary_operators=Main.eval(
+                str(tuple(self.binary_operators)).replace("'", "")
+            ),
+            unary_operators=Main.eval(
+                str(tuple(self.unary_operators)).replace("'", "")
+            ),
             bin_constraints=bin_constraints,
             una_constraints=una_constraints,
-            complexity_of_operators=complexity_of_operators,
-            complexity_of_constants=self.params["complexity_of_constants"],
-            complexity_of_variables=self.params["complexity_of_variables"],
-            nested_constraints=nested_constraints,
+            complexity_of_operators=self.complexity_of_operators,
+            complexity_of_constants=self.complexity_of_constants,
+            complexity_of_variables=self.complexity_of_variables,
+            nested_constraints=self.nested_constraints,
             loss=Main.custom_loss,
-            maxsize=int(maxsize),
+            maxsize=int(self.maxsize),
             hofFile=_escape_filename(self.equation_file),
-            npopulations=int(self.params["populations"]),
-            batching=batching,
-            batchSize=int(
-                min([self.params["batch_size"], len(X)]) if batching else len(X)
-            ),
+            npopulations=int(self.populations),
+            batching=self.batching,
+            batchSize=int(min([self.batch_size, len(X)]) if self.batching else len(X)),
             mutationWeights=mutationWeights,
-            probPickFirst=self.params["tournament_selection_p"],
-            ns=self.params["tournament_selection_n"],
+            probPickFirst=self.tournament_selection_p,
+            ns=self.tournament_selection_n,
             # These have the same name:
-            parsimony=self.params["parsimony"],
-            alpha=self.params["alpha"],
-            maxdepth=self.params["maxdepth"],
-            fast_cycle=self.params["fast_cycle"],
-            migration=self.params["migration"],
-            hofMigration=self.params["hof_migration"],
-            fractionReplacedHof=self.params["fraction_replaced_hof"],
-            shouldOptimizeConstants=self.params["should_optimize_constants"],
-            warmupMaxsizeBy=self.params["warmup_maxsize_by"],
-            useFrequency=self.params["use_frequency"],
-            useFrequencyInTournament=self.params["use_frequency_in_tournament"],
-            npop=self.params["population_size"],
-            ncyclesperiteration=self.params["ncyclesperiteration"],
-            fractionReplaced=self.params["fraction_replaced"],
-            topn=self.params["topn"],
-            verbosity=self.params["verbosity"],
-            optimizer_algorithm=self.params["optimizer_algorithm"],
-            optimizer_nrestarts=self.params["optimizer_nrestarts"],
-            optimize_probability=self.params["optimize_probability"],
-            optimizer_iterations=self.params["optimizer_iterations"],
-            perturbationFactor=self.params["perturbation_factor"],
-            annealing=self.params["annealing"],
+            parsimony=self.parsimony,
+            alpha=self.alpha,
+            maxdepth=self.maxdepth,
+            fast_cycle=self.fast_cycle,
+            migration=self.migration,
+            hofMigration=self.hof_migration,
+            fractionReplacedHof=self.fraction_replaced_hof,
+            shouldOptimizeConstants=self.should_optimize_constants,
+            warmupMaxsizeBy=self.warmup_maxsize_by,
+            useFrequency=self.use_frequency,
+            useFrequencyInTournament=self.use_frequency_in_tournament,
+            npop=self.population_size,
+            ncyclesperiteration=self.ncyclesperiteration,
+            fractionReplaced=self.fraction_replaced,
+            topn=self.topn,
+            verbosity=self.verbosity,
+            optimizer_algorithm=self.optimizer_algorithm,
+            optimizer_nrestarts=self.optimizer_nrestarts,
+            optimize_probability=self.optimize_probability,
+            optimizer_iterations=self.optimizer_iterations,
+            perturbationFactor=self.perturbation_factor,
+            annealing=self.annealing,
             stateReturn=True,  # Required for state saving.
-            progress=self.params["progress"],
-            timeout_in_seconds=self.params["timeout_in_seconds"],
-            crossoverProbability=self.params["crossover_probability"],
-            skip_mutation_failures=self.params["skip_mutation_failures"],
-            max_evals=self.params["max_evals"],
-            earlyStopCondition=self.params["early_stop_condition"],
+            progress=self.progress,
+            timeout_in_seconds=self.timeout_in_seconds,
+            crossoverProbability=self.crossover_probability,
+            skip_mutation_failures=self.skip_mutation_failures,
+            max_evals=self.max_evals,
+            earlyStopCondition=self.early_stop_condition,
         )
 
-        np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[
-            self.params["precision"]
-        ]
+        # Convert data to desired precision
+        np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[self.precision]
 
         Main.X = np.array(X, dtype=np_dtype).T
         if len(y.shape) == 1:
@@ -1351,48 +1332,312 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
         else:
             Main.weights = None
 
-        cprocs = 0 if multithreading else procs
+        cprocs = 0 if self.multithreading else self.procs
 
         # Call to Julia backend.
         # See https://github.com/search?q=%22function+EquationSearch%22+repo%3AMilesCranmer%2FSymbolicRegression.jl+path%3A%2Fsrc%2F+filename%3ASymbolicRegression.jl+language%3AJulia&type=Code
-        self.raw_julia_state = Main.EquationSearch(
+        self.raw_julia_state_ = Main.EquationSearch(
             Main.X,
             Main.y,
             weights=Main.weights,
-            niterations=int(self.params["niterations"]),
-            varMap=(
-                variable_names
-                if selection is None
-                else [variable_names[i] for i in selection]
-            ),
+            niterations=int(self.niterations),
+            varMap=self.feature_names_in_.tolist(),
             options=options,
             numprocs=int(cprocs),
-            multithreading=bool(multithreading),
-            saved_state=self.raw_julia_state,
-            addprocs_function=cluster_manager,
+            multithreading=bool(self.multithreading),
+            saved_state=self.raw_julia_state_,
+            addprocs_function=self.cluster_manager,
         )
 
-        self.variable_names = variable_names
-        self.selection = selection
+        # Set attributes
+        self.equations_ = self.get_hof()
 
-        # Not in params:
-        # selection, variable_names, multioutput
+        if self.delete_tempfiles:
+            shutil.rmtree(self.tempdir_)
 
-        self.equations = self.get_hof()
+        already_ran = True
 
-        if self.params["delete_tempfiles"]:
-            shutil.rmtree(tmpdir)
+        return self
 
-        already_ran = True
+    def fit(
+        self,
+        X,
+        y,
+        Xresampled=None,
+        weights=None,
+        variable_names=None,
+        from_equation_file=False,
+    ):
+        """
+        Search for equations to fit the dataset and store them in `self.equations_`.
+
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Training data.
+
+        y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary.
+
+        Xresampled : {ndarray | pandas.DataFrame} of shape (n_resampled, n_features), default=None
+            Resampled training data used for denoising.
+
+        weights : {ndarray | pandas.DataFrame} of the same shape as y, default=None
+            Each element is how to weight the mean-square-error loss
+            for that particular element of y.
+
+        variable_names : list[str], default=None
+            A list of names for the variables, other than "x0", "x1", etc.
+            If :param`X` is a pandas dataframe, the column names will be used.
+            If variable_names are specified
+
+        from_equation_file : bool, default=False
+            Allows model to be initialized/fit from a previous run that has
+            been saved to a file. If true, a value of y still needs to be
+            passed such that `nout_` can be determined, however, the values of
+            y are irrelevant and can be all zeros.
+
+        Returns
+        -------
+        self : object
+            Fitted Estimator.
+        """
+
+        # Init attributes that are not specified in BaseEstimator
+        self.equations_ = None
+        self.nout_ = 1
+        self.selection_mask_ = None
+        self.raw_julia_state_ = None
+
+        # Parameter input validation (for parameters defined in __init__)
+        self._validate_params(n_samples=X.shape[0])
+        X, y, variable_names = self._validate_fit_params(
+            X, y, Xresampled, variable_names
+        )
+
+        # Pre transformations (feature selection and denoising)
+        X, y, variable_names = self._pre_transform_training_data(
+            X, y, Xresampled, variable_names
+        )
+
+        # Warn about large feature counts (still warn if feature count is large after running feature selection)
+        if self.n_features_in_ >= 10:
+            warnings.warn(
+                "Note: you are running with 10 features or more. "
+                "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
+                "Consider using feature selection techniques to select the most important features "
+                "(you can do this automatically with the `select_k_features` parameter), "
+                "or, alternatively, doing a dimensionality reduction beforehand. "
+                "For example, `X = PCA(n_components=6).fit_transform(X)`, "
+                "using scikit-learn's `PCA` class, "
+                "will reduce the number of features to 6 in an interpretable way, "
+                "as each resultant feature "
+                "will be a linear combination of the original features. "
+            )
+
+        # Assertion checks
+        use_custom_variable_names = variable_names is not None
+        # TODO: this is always true.
+
+        _check_assertions(
+            X,
+            self.binary_operators,
+            self.unary_operators,
+            use_custom_variable_names,
+            variable_names,
+            weights,
+            y,
+        )
+
+        # Fitting procedure
+        if not from_equation_file:
+            self._run(X=X, y=y, weights=weights)
+        else:
+            self.equations_ = self.get_hof()
+
+        return self
+
+    def refresh(self):
+        """
+        Updates self.equations_ with any new options passed, such as
+        :param`extra_sympy_mappings`.
+        """
+        self.equations_ = self.get_hof()
+
+    def _decision_function(self, X, best_equation):
+        """
+        Decide what value to predict based on the 'best' equation found
+        from fitting.
+
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Testing data for evaluating the model.
+
+        best_equation : pd.Series
+            Selected best equation from `self.equations_`.
+
+        Returns
+        -------
+        y_predicted : ndarray of shape (n_samples,) or (n_samples, nout_)
+            Values predicted by substituting `X` into the
+            :param`best_equation`.
+
+        Raises
+        ------
+        ValueError
+            Raises if the `best_equation` cannot be evaluated.
+        """
+        check_is_fitted(self)
+
+        if isinstance(X, pd.DataFrame):
+            X = X[self.feature_names_in_]
+        elif self.selection_mask_ is not None:
+            X = X[:, self.selection_mask_]
+
+        X = self._validate_data(X, reset=False)
+        try:
+            if self.nout_ > 1:
+                return np.stack(
+                    [eq["lambda_format"](X) for eq in best_equation], axis=1
+                )
+            return best_equation["lambda_format"](X)
+        except Exception as error:
+            raise ValueError(
+                "Failed to evaluate the expression. "
+                "If you are using a custom operator, make sure to define it in :param`extra_sympy_mappings`, "
+                "e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
+            ) from error
+
+    def predict(self, X, index=None):
+        """Predict y from input X using the equation chosen by `model_selection`.
+
+        You may see what equation is used by printing this object. X should have the same
+        columns as the training data.
+
+        Parameters
+        ----------
+        X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
+            Training data.
+
+        index : int, default=None
+            If you want to compute the output of an expression using a
+            particular row of `self.equations_`, you may specify the index here.
+
+        Returns
+        -------
+        y_predicted : ndarray of shape (n_samples, nout_)
+            Values predicted by substituting `X` into the fitted symbolic
+            regression model.
+        """
+        self.refresh()
+        best_equation = self.get_best(index=index)
+        return self._decision_function(X, best_equation)
+
+    def sympy(self, index=None):
+        """Return sympy representation of the equation(s) chosen by `model_selection`.
+
+        Parameters
+        ----------
+        index : int, default=None
+            If you wish to select a particular equation from
+            `self.equations_`, give the index number here. This overrides
+            the `model_selection` parameter.
+
+        Returns
+        -------
+        best_equation : str, list[str] of length nout_
+            SymPy representation of the best equation.
+        """
+        self.refresh()
+        best = self.get_best(index=index)
+        if self.nout_ > 1:
+            return [eq["sympy_format"] for eq in best]
+        return best["sympy_format"]
+
+    def latex(self, index=None):
+        """Return latex representation of the equation(s) chosen by `model_selection`.
+
+        Parameters
+        ----------
+        index : int, default=None
+            If you wish to select a particular equation from
+            `self.equations_`, give the index number here. This overrides
+            the `model_selection` parameter.
+
+        Returns
+        -------
+        best_equation : str or list[str] of length nout_
+            LaTeX expression of the best equation.
+        """
+        self.refresh()
+        sympy_representation = self.sympy(index=index)
+        if self.nout_ > 1:
+            return [sympy.latex(s) for s in sympy_representation]
+        return sympy.latex(sympy_representation)
+
+    def jax(self, index=None):
+        """Return jax representation of the equation(s) chosen by `model_selection`.
+
+        Each equation (multiple given if there are multiple outputs) is a dictionary
+        containing {"callable": func, "parameters": params}. To call `func`, pass
+        func(X, params). This function is differentiable using `jax.grad`.
+
+        Parameters
+        ----------
+        index : int, default=None
+            If you wish to select a particular equation from
+            `self.equations_`, give the row number here. This overrides
+            the `model_selection` parameter.
+
+        Returns
+        -------
+        best_equation : dict[str, Any]
+            Dictionary of callable jax function in "callable" key,
+            and jax array of parameters as "parameters" key.
+        """
+
+        self.set_params(output_jax_format=True)
+        self.refresh()
+        best = self.get_best(index=index)
+        if self.nout_ > 1:
+            return [eq["jax_format"] for eq in best]
+        return best["jax_format"]
+
+    def pytorch(self, index=None):
+        """Return pytorch representation of the equation(s) chosen by `model_selection`.
+
+        Each equation (multiple given if there are multiple outputs) is a PyTorch module
+        containing the parameters as trainable attributes. You can use the module like
+        any other PyTorch module: `module(X)`, where `X` is a tensor with the same
+        column ordering as trained with.
+
+        Parameters
+        ----------
+        index : int, default=None
+            If you wish to select a particular equation from
+            `self.equations_`, give the row number here. This overrides
+            the `model_selection` parameter.
+
+        Returns
+        -------
+        best_equation : torch.nn.Module
+            PyTorch module representing the expression.
+        """
+        self.set_params(output_torch_format=True)
+        self.refresh()
+        best = self.get_best(index=index)
+        if self.nout_ > 1:
+            return [eq["torch_format"] for eq in best]
+        return best["torch_format"]
 
     def get_hof(self):
         """Get the equations from a hall of fame file. If no arguments
         entered, the ones used previously from a call to PySR will be used."""
-
         try:
-            if self.multioutput:
+            if self.nout_ > 1:
                 all_outputs = []
-                for i in range(1, self.nout + 1):
+                for i in range(1, self.nout_ + 1):
                     df = pd.read_csv(
                         str(self.equation_file) + f".out{i}" + ".bkup",
                         sep="|",
@@ -1436,20 +1681,15 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
                 jax_format = []
             if self.output_torch_format:
                 torch_format = []
-            use_custom_variable_names = len(self.variable_names) != 0
             local_sympy_mappings = {
                 **self.extra_sympy_mappings,
                 **sympy_mappings,
             }
 
-            if use_custom_variable_names:
-                sympy_symbols = [
-                    sympy.Symbol(self.variable_names[i]) for i in range(self.n_features)
-                ]
-            else:
-                sympy_symbols = [
-                    sympy.Symbol("x%d" % i) for i in range(self.n_features)
-                ]
+            sympy_symbols = [
+                sympy.Symbol(self.feature_names_in_[i])
+                for i in range(self.n_features_in_)
+            ]
 
             for _, eqn_row in output.iterrows():
                 eqn = sympify(eqn_row["equation"], locals=local_sympy_mappings)
@@ -1458,7 +1698,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
                 # Numpy:
                 lambda_format.append(
                     CallableEquation(
-                        sympy_symbols, eqn, self.selection, self.variable_names
+                        sympy_symbols, eqn, self.selection_mask_, self.feature_names_in_
                     )
                 )
 
@@ -1469,7 +1709,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
                     func, params = sympy2jax(
                         eqn,
                         sympy_symbols,
-                        selection=self.selection,
                         extra_jax_mappings=self.extra_jax_mappings,
                     )
                     jax_format.append({"callable": func, "parameters": params})
@@ -1481,7 +1720,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
                     module = sympy2torch(
                         eqn,
                         sympy_symbols,
-                        selection=self.selection,
                         extra_torch_mappings=self.extra_torch_mappings,
                     )
                     torch_format.append(module)
@@ -1523,11 +1761,48 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
 
             ret_outputs.append(output[output_cols])
 
-        if self.multioutput:
+        if self.nout_ > 1:
             return ret_outputs
         return ret_outputs[0]
 
-    def score(self, X, y):
-        del X
-        del y
-        raise NotImplementedError
+
+def _denoise(X, y, Xresampled=None):
+    """Denoise the dataset using a Gaussian process"""
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
+
+    gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
+    gpr = GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=50)
+    gpr.fit(X, y)
+    if Xresampled is not None:
+        return Xresampled, gpr.predict(Xresampled)
+
+    return X, gpr.predict(X)
+
+
+# Function hasnot been removed only due to usage in module tests
+def _handle_feature_selection(X, select_k_features, y, variable_names):
+    if select_k_features is not None:
+        selection = run_feature_selection(X, y, select_k_features)
+        print(f"Using features {[variable_names[i] for i in selection]}")
+        X = X[:, selection]
+
+    else:
+        selection = None
+    return X, selection
+
+
+def run_feature_selection(X, y, select_k_features):
+    """Use a gradient boosting tree regressor as a proxy for finding
+    the k most important features in X, returning indices for those
+    features as output."""
+
+    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.feature_selection import SelectFromModel
+
+    clf = RandomForestRegressor(n_estimators=100, max_depth=3, random_state=0)
+    clf.fit(X, y)
+    selector = SelectFromModel(
+        clf, threshold=-np.inf, max_features=select_k_features, prefit=True
+    )
+    return selector.get_support(indices=True)