Spaces:

MilesCranmer
/

PySR

Sleeping

File size: 44,049 Bytes

2f296b6
976f8d8
 
2f296b6
2f38c9c
976f8d8
 
 
bed9614
976f8d8
 
7ed402e
2f296b6
8f60615
b2d7f41
c822df8
8f60615
c822df8
 
618a3f8
 
 
 
 
 
bed9614
7d4300a
2f38c9c
 
9a7c989
 
14e9a4b
4b56660
26a3c7f
5ada6c7
 
aaf3c83
14e9a4b
ed35c4e
 
7d4300a
2f38c9c
 
4c9fe98
 
 
 
af14165
c7187a6
af14165
10ff16a
44aefe9
 
 
 
 
 
 
 
 
00875eb
 
 
 
 
 
 
 
 
 
 
c86910d
 
6146f6b
c86910d
4c9fe98
 
5784d04
 
4c9fe98
 
98ccd5e
c86910d
d16abb4
c86910d
 
 
 
 
 
 
4c9fe98
af14165
c7187a6
c86910d
 
 
6146f6b
7049740
 
 
673e8d5
3fcdd9a
 
673e8d5
3fcdd9a
 
 
 
 
 
 
673e8d5
1d19a08
af8d4da
 
 
1f3aace
d16abb4
af8d4da
0445487
1f3aace
af8d4da
 
81cfb5c
af8d4da
1a177ee
 
af8d4da
7049740
 
 
fd42a40
7d4300a
af14165
1b17efe
 
fd42a40
af14165
e0e2933
7d4300a
 
6f3a6fd
 
bfb135a
 
 
4c9fe98
7d4300a
af14165
c7187a6
2f38c9c
c7187a6
af14165
 
ddb4d52
d9913e3
 
 
 
 
 
 
 
 
7a792a8
 
 
 
d85c1a5
ae0b11e
 
ed35c4e
6a4fa2c
 
 
 
5af6354
6a4fa2c
 
af14165
7d4300a
 
932dcf5
7d4300a
 
d85c1a5
4c9fe98
7d4300a
ae0b11e
6a4fa2c
b293893
 
 
f5577ea
b293893
 
 
 
 
50c7eff
b293893
 
f5577ea
b293893
 
 
 
 
6a4fa2c
90d3ef7
 
 
 
 
 
 
 
 
6e07d35
90d3ef7
 
 
8fe6c62
90d3ef7
775c667
ed35c4e
a232b56
58834e8
a232b56
 
 
4c9fe98
a232b56
0020398
58834e8
0020398
 
7d4300a
c7187a6
faa83d3
8cfda07
aa16a1e
03d5a42
 
87880d1
03d5a42
 
 
81cfb5c
0b521f3
7c2bce0
 
 
 
 
03d5a42
aa16a1e
 
c7187a6
aa16a1e
 
0fba777
 
 
 
 
 
21d6b92
 
 
 
 
 
5750d1a
ed35c4e
af14165
ffd9cd1
 
 
932dcf5
27fac96
5750d1a
 
4c9fe98
5750d1a
27fac96
 
 
aaf3c83
 
af14165
 
 
5750d1a
50f37a0
ffd9cd1
 
ed35c4e
 
 
ffd9cd1
 
 
 
ed35c4e
ffd9cd1
ad8332d
 
ffd9cd1
 
 
ed35c4e
 
 
ffd9cd1
 
af14165
ffd9cd1
 
561e614
ffd9cd1
b13cd4f
4c9fe98
ffd9cd1
c7187a6
af14165
 
 
45d2b5f
1662e82
ffd9cd1
 
ed35c4e
 
 
ffd9cd1
 
45d2b5f
 
ffd9cd1
a190947
 
 
 
 
 
 
 
224f906
a190947
 
 
 
f266b70
a190947
f266b70
 
 
a190947
ccf71e9
 
 
593c674
 
 
 
ccf71e9
 
 
58e25a9
 
 
 
 
34f4e3f
58e25a9
 
 
 
 
 
 
 
 
 
 
ccf71e9
78cdb0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34f4e3f
78cdb0e
 
 
 
 
b53e7fa
 
 
 
 
b8a97f1
34f4e3f
b53e7fa
 
 
 
1adfa85
c6c8728
 
 
 
 
 
 
 
 
 
 
 
 
 
82b18ca
 
 
 
 
 
 
 
0dbee97
82b18ca
 
 
 
 
 
 
0dbee97
82b18ca
c6c8728
 
 
 
 
 
1adfa85
 
fbb7cf7
 
 
7d4300a
 
ec8124e
 
 
7d4300a
 
c6c8728
c7187a6
f59f827
1adfa85
f59f827
1adfa85
a55fec0
 
 
 
 
1adfa85
 
f59f827
1adfa85
 
c7187a6
 
 
f5577ea
97e6589
175b024
 
 
 
 
 
 
 
 
 
 
 
 
97e6589
 
 
51a6b05
ed35c4e
51a6b05
 
ed35c4e
7d4300a
97e6589
 
 
 
ed35c4e
7d4300a
 
5fac847
7d4300a
 
5af6354
7d4300a
 
c96b30c
ef7a292
7d4300a
97e6589
7d4300a
1662e82
912de01
 
 
 
042b27f
b8a97f1
 
 
 
 
 
 
 
 
 
 
 
 
 
912de01
ad84a1c
 
 
 
c7187a6
912de01
 
 
 
c7187a6
ad84a1c
8f60615
 
 
 
 
 
 
 
 
 
 
6671f5e
 
 
 
 
ad84a1c
 
27fac96
ad84a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673c1d2
045bdb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc129c
 
 
 
 
 
 
 
e29a6da
 
 
 
f340c5b
e29a6da
c5dc7b7
f340c5b
 
 
 
 
 
 
 
c5dc7b7
f340c5b
 
 
c5dc7b7
e29a6da
92eb30b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f296b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673c1d2
 
857a9ad
2f296b6
 
87880d1
be36d4a
 
 
2f296b6
be36d4a
 
4c9fe98
aaf3c83
857a9ad
c7c02bf
857a9ad
bd90cfc
048be9c
86421d6
 
 
857a9ad
 
 
 
 
2f296b6
 
 
 
 
be36d4a
 
 
 
bd90cfc
2f296b6
c6c8728
af0be92
 
e9fbda8
3fba294
 
 
ce904eb
 
 
af0be92
ce904eb
 
 
 
 
af0be92
 
 
 
 
 
ce904eb
af0be92
c6c8728
3752ba6
 
 
 
 
 
 
 
 
 
 
c6c8728
44b5271
 
 
 
 
 
 
 
 
3752ba6
44b5271
c6c8728
 
 
 
 
fab6f87
c6c8728
 
 
 
 
 
 
fab6f87
c6c8728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44b5271
 
c6c8728
 
3ef2b32
 
 
c6c8728
3752ba6
 
 
c6c8728
 
44b5271
 
215a692
 
c6c8728
3ef2b32
 
 
c6c8728
3752ba6
 
 
44b5271
c6c8728
44b5271
 
c6c8728
3ef2b32
 
 
c6c8728
3752ba6
 
 
 
 
c6c8728
 
44b5271
 
215a692
 
c6c8728
3ef2b32
c6c8728
3752ba6
 
 
c6c8728
9a5df63
82b18ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ef2b32
 
 
82b18ca
 
3ef2b32
 
 
82b18ca
 
 
 
 
3752ba6
82b18ca
 
 
 
9a5df63
 
 
b2d7f41
 
 
 
 
9a5df63
 
 
 
 
b2d7f41
9a5df63
 
b2d7f41
9a5df63
 
b2d7f41
118c5f6
2a802ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ef2b32
 
 
2a802ab
3752ba6
 
 
 
 
2a802ab
a2fd8f3
 
2d025c2
 
 
 
 
 
 
 
 
 
 
 
 
0e15dd6
2d025c2
0e86456
 
 
 
 
 
2d025c2
 
4f7e6cf
0e86456
 
 
 
 
 
2d025c2
0e15dd6
2d025c2
 
0e15dd6
 
 
af0be92
 
 
2d025c2
04454ac
 
 
 
 
 
 
 
 
 
 
 
fbd0ad8
04454ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a117981
db8bfce
 
 
 
a117981
 
22eb380
 
a117981
 
db8bfce
22eb380
 
 
 
a117981
 
 
 
 
 
 
22eb380
db8bfce
a117981
 
 
 
 
 
 
 
 
 
 
 
db8bfce
 
a117981
22eb380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db8bfce
 
 
 
 
 
 
22eb380
 
 
2d025c2
ef66f4a
a2fd8f3
 
 
 
 
 
 
2d025c2
a2fd8f3
ef66f4a
 
 
 
a2fd8f3
ef66f4a
a2fd8f3

import os
import pickle as pkl
import tempfile
import traceback
import unittest
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import sympy
from sklearn.utils.estimator_checks import check_estimator

from .. import PySRRegressor, install, jl
from ..export_latex import sympy2latex
from ..feature_selection import _handle_feature_selection, run_feature_selection
from ..julia_helpers import init_julia
from ..sr import _check_assertions, _process_constraints, idx_model_selection
from ..utils import _csv_filename_to_pkl_filename
from .params import (
    DEFAULT_NCYCLES,
    DEFAULT_NITERATIONS,
    DEFAULT_PARAMS,
    DEFAULT_POPULATIONS,
)


class TestPipeline(unittest.TestCase):
    def setUp(self):
        # Using inspect,
        # get default niterations from PySRRegressor, and double them:
        self.default_test_kwargs = dict(
            progress=False,
            model_selection="accuracy",
            niterations=DEFAULT_NITERATIONS * 2,
            populations=DEFAULT_POPULATIONS * 2,
            temp_equation_file=True,
        )
        self.rstate = np.random.RandomState(0)
        self.X = self.rstate.randn(100, 5)

    def test_linear_relation(self):
        y = self.X[:, 0]
        model = PySRRegressor(
            **self.default_test_kwargs,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
        )
        model.fit(self.X, y)
        print(model.equations_)
        self.assertLessEqual(model.get_best()["loss"], 1e-4)

    def test_linear_relation_named(self):
        y = self.X[:, 0]
        model = PySRRegressor(
            **self.default_test_kwargs,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
        )
        model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
        self.assertIn("c1", model.equations_.iloc[-1]["equation"])

    def test_linear_relation_weighted(self):
        y = self.X[:, 0]
        weights = np.ones_like(y)
        model = PySRRegressor(
            **self.default_test_kwargs,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
        )
        model.fit(self.X, y, weights=weights)
        print(model.equations_)
        self.assertLessEqual(model.get_best()["loss"], 1e-4)

    def test_multiprocessing_turbo_custom_objective(self):
        rstate = np.random.RandomState(0)
        y = self.X[:, 0]
        y += rstate.randn(*y.shape) * 1e-4
        model = PySRRegressor(
            **self.default_test_kwargs,
            # Turbo needs to work with unsafe operators:
            unary_operators=["sqrt"],
            procs=2,
            multithreading=False,
            turbo=True,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
            loss_function="""
            function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
                prediction, flag = eval_tree_array(tree, dataset.X, options)
                !flag && return T(Inf)
                abs3(x) = abs(x) ^ 3
                return sum(abs3, prediction .- dataset.y) / length(prediction)
            end
            """,
        )
        model.fit(self.X, y)
        print(model.equations_)
        best_loss = model.equations_.iloc[-1]["loss"]
        self.assertLessEqual(best_loss, 1e-10)
        self.assertGreaterEqual(best_loss, 0.0)

        # Test options stored:
        self.assertEqual(model.julia_options_.turbo, True)

    def test_multiline_seval(self):
        # The user should be able to run multiple things in a single seval call:
        num = jl.seval(
            """
            function my_new_objective(x)
                x^2
            end
            1.5
        """
        )
        self.assertEqual(num, 1.5)

    def test_high_precision_search_custom_loss(self):
        y = 1.23456789 * self.X[:, 0]
        model = PySRRegressor(
            **self.default_test_kwargs,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
            elementwise_loss="my_loss(prediction, target) = (prediction - target)^2",
            precision=64,
            parsimony=0.01,
            warm_start=True,
        )
        model.fit(self.X, y)

        # We should have that the model state is now a Float64 hof:
        test_state = model.raw_julia_state_
        self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)

        # Test options stored:
        self.assertEqual(model.julia_options_.turbo, False)

    def test_multioutput_custom_operator_quiet_custom_complexity(self):
        y = self.X[:, [0, 1]] ** 2
        model = PySRRegressor(
            unary_operators=["square_op(x) = x^2"],
            extra_sympy_mappings={"square_op": lambda x: x**2},
            complexity_of_operators={"square_op": 2, "plus": 1},
            binary_operators=["plus"],
            verbosity=0,
            **self.default_test_kwargs,
            procs=0,
            # Test custom operators with turbo:
            turbo=True,
            # Test custom operators with constraints:
            nested_constraints={"square_op": {"square_op": 3}},
            constraints={"square_op": 10},
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
        )
        model.fit(self.X, y)
        equations = model.equations_
        print(equations)
        self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
        self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
        self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)

        test_y1 = model.predict(self.X)
        test_y2 = model.predict(self.X, index=[-1, -1])

        mse1 = np.average((test_y1 - y) ** 2)
        mse2 = np.average((test_y2 - y) ** 2)

        self.assertLessEqual(mse1, 1e-4)
        self.assertLessEqual(mse2, 1e-4)

        bad_y = model.predict(self.X, index=[0, 0])
        bad_mse = np.average((bad_y - y) ** 2)
        self.assertGreater(bad_mse, 1e-4)

    def test_multioutput_weighted_with_callable_temp_equation(self):
        X = self.X.copy()
        y = X[:, [0, 1]] ** 2
        w = self.rstate.rand(*y.shape)
        w[w < 0.5] = 0.0
        w[w >= 0.5] = 1.0

        # Double equation when weights are 0:
        y = (2 - w) * y
        # Thus, pysr needs to use the weights to find the right equation!

        model = PySRRegressor(
            unary_operators=["sq(x) = x^2"],
            binary_operators=["plus"],
            extra_sympy_mappings={"sq": lambda x: x**2},
            **self.default_test_kwargs,
            procs=0,
            delete_tempfiles=False,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
        )
        model.fit(X.copy(), y, weights=w)

        # These tests are flaky, so don't fail test:
        try:
            np.testing.assert_almost_equal(
                model.predict(X.copy())[:, 0], X[:, 0] ** 2, decimal=3
            )
        except AssertionError:
            print("Error in test_multioutput_weighted_with_callable_temp_equation")
            print("Model equations: ", model.sympy()[0])
            print("True equation: x0^2")

        try:
            np.testing.assert_almost_equal(
                model.predict(X.copy())[:, 1], X[:, 1] ** 2, decimal=3
            )
        except AssertionError:
            print("Error in test_multioutput_weighted_with_callable_temp_equation")
            print("Model equations: ", model.sympy()[1])
            print("True equation: x1^2")

    def test_complex_equations_anonymous_stop(self):
        X = self.rstate.randn(100, 3) + 1j * self.rstate.randn(100, 3)
        y = (2 + 1j) * np.cos(X[:, 0] * (0.5 - 0.3j))
        model = PySRRegressor(
            binary_operators=["+", "-", "*"],
            unary_operators=["cos"],
            **self.default_test_kwargs,
            early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
        )
        model.niterations = DEFAULT_NITERATIONS * 10
        model.fit(X, y)
        test_y = model.predict(X)
        self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
        self.assertLessEqual(np.average(np.abs(test_y - y) ** 2), 1e-4)

    def test_empty_operators_single_input_warm_start(self):
        X = self.rstate.randn(100, 1)
        y = X[:, 0] + 3.0
        regressor = PySRRegressor(
            unary_operators=[],
            binary_operators=["plus"],
            **self.default_test_kwargs,
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
        )
        self.assertTrue("None" in regressor.__repr__())
        regressor.fit(X, y)
        self.assertTrue("None" not in regressor.__repr__())
        self.assertTrue(">>>>" in regressor.__repr__())

        self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
        np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)

        # Test if repeated fit works:
        regressor.set_params(
            niterations=1,
            ncycles_per_iteration=2,
            warm_start=True,
            early_stop_condition=None,
        )

        # We should have that the model state is now a Float32 hof:
        test_state = regressor.julia_state_
        self.assertTrue(
            jl.first(jl.typeof(jl.last(test_state)).parameters) == jl.Float32
        )

        # This should exit almost immediately, and use the old equations
        regressor.fit(X, y)

        self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
        np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)

        # Tweak model selection:
        regressor.set_params(model_selection="best")
        self.assertEqual(regressor.get_params()["model_selection"], "best")
        self.assertTrue("None" not in regressor.__repr__())
        self.assertTrue(">>>>" in regressor.__repr__())

    def test_warm_start_set_at_init(self):
        # Smoke test for bug where warm_start=True is set at init
        y = self.X[:, 0]
        regressor = PySRRegressor(warm_start=True, max_evals=10)
        regressor.fit(self.X, y)

    def test_noisy(self):
        y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
        model = PySRRegressor(
            # Test that passing a single operator works:
            unary_operators="sq(x) = x^2",
            binary_operators="plus",
            extra_sympy_mappings={"sq": lambda x: x**2},
            **self.default_test_kwargs,
            procs=0,
            denoise=True,
            early_stop_condition="stop_if(loss, complexity) = loss < 0.05 && complexity == 2",
        )
        # We expect in this case that the "best"
        # equation should be the right one:
        model.set_params(model_selection="best")
        # Also try without a temp equation file:
        model.set_params(temp_equation_file=False)
        model.fit(self.X, y)
        self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
        self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)

    def test_pandas_resample_with_nested_constraints(self):
        X = pd.DataFrame(
            {
                "T": self.rstate.randn(500),
                "x": self.rstate.randn(500),
                "unused_feature": self.rstate.randn(500),
            }
        )
        true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
        y = true_fn(X)
        noise = self.rstate.randn(500) * 0.01
        y = y + noise
        # We also test y as a pandas array:
        y = pd.Series(y)
        # Resampled array is a different order of features:
        Xresampled = pd.DataFrame(
            {
                "unused_feature": self.rstate.randn(100),
                "x": self.rstate.randn(100),
                "T": self.rstate.randn(100),
            }
        )
        model = PySRRegressor(
            unary_operators=[],
            binary_operators=["+", "*", "/", "-"],
            **self.default_test_kwargs,
            denoise=True,
            nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
            early_stop_condition="stop_if(loss, complexity) = loss < 1e-3 && complexity == 7",
        )
        model.fit(X, y, Xresampled=Xresampled)
        self.assertNotIn("unused_feature", model.latex())
        self.assertIn("T", model.latex())
        self.assertIn("x", model.latex())
        self.assertLessEqual(model.get_best()["loss"], 1e-1)
        fn = model.get_best()["lambda_format"]
        X2 = pd.DataFrame(
            {
                "T": self.rstate.randn(100),
                "unused_feature": self.rstate.randn(100),
                "x": self.rstate.randn(100),
            }
        )
        self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-1)
        self.assertLess(np.average((model.predict(X2) - true_fn(X2)) ** 2), 1e-1)

    def test_high_dim_selection_early_stop(self):
        X = pd.DataFrame({f"k{i}": self.rstate.randn(10000) for i in range(10)})
        Xresampled = pd.DataFrame({f"k{i}": self.rstate.randn(100) for i in range(10)})
        y = X["k7"] ** 2 + np.cos(X["k9"]) * 3

        model = PySRRegressor(
            unary_operators=["cos"],
            select_k_features=3,
            early_stop_condition=1e-4,  # Stop once most accurate equation is <1e-4 MSE
            maxsize=12,
            **self.default_test_kwargs,
        )
        model.set_params(model_selection="accuracy")
        model.fit(X, y, Xresampled=Xresampled)
        self.assertLess(np.average((model.predict(X) - y) ** 2), 1e-4)
        # Again, but with numpy arrays:
        model.fit(X.values, y.values, Xresampled=Xresampled.values)
        self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)

    def test_load_model(self):
        """See if we can load a ran model from the equation file."""
        csv_file_data = """
        Complexity,Loss,Equation
        1,0.19951081,"1.9762075"
        3,0.12717344,"(f0 + 1.4724599)"
        4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
        # Strip the indents:
        csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])

        for from_backup in [False, True]:
            rand_dir = Path(tempfile.mkdtemp())
            equation_filename = str(rand_dir / "equation.csv")
            with open(equation_filename + (".bkup" if from_backup else ""), "w") as f:
                f.write(csv_file_data)
            model = PySRRegressor.from_file(
                equation_filename,
                n_features_in=5,
                feature_names_in=["f0", "f1", "f2", "f3", "f4"],
                binary_operators=["+", "*", "/", "-", "^"],
                unary_operators=["cos"],
            )
            X = self.rstate.rand(100, 5)
            y_truth = 2.2683423 ** np.cos(X[:, 3])
            y_test = model.predict(X, 2)

            np.testing.assert_allclose(y_truth, y_test)

    def test_load_model_simple(self):
        # Test that we can simply load a model from its equation file.
        y = self.X[:, [0, 1]] ** 2
        model = PySRRegressor(
            # Test that passing a single operator works:
            unary_operators="sq(x) = x^2",
            binary_operators="plus",
            extra_sympy_mappings={"sq": lambda x: x**2},
            **self.default_test_kwargs,
            procs=0,
            denoise=True,
            early_stop_condition="stop_if(loss, complexity) = loss < 0.05 && complexity == 2",
        )
        rand_dir = Path(tempfile.mkdtemp())
        equation_file = rand_dir / "equations.csv"
        model.set_params(temp_equation_file=False)
        model.set_params(equation_file=equation_file)
        model.fit(self.X, y)

        # lambda functions are removed from the pickling, so we need
        # to pass it during the loading:
        model2 = PySRRegressor.from_file(
            model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
        )

        np.testing.assert_allclose(model.predict(self.X), model2.predict(self.X))

        # Try again, but using only the pickle file:
        for file_to_delete in [str(equation_file), str(equation_file) + ".bkup"]:
            if os.path.exists(file_to_delete):
                os.remove(file_to_delete)

        pickle_file = rand_dir / "equations.pkl"
        model3 = PySRRegressor.from_file(
            model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
        )
        np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))


def manually_create_model(equations, feature_names=None):
    if feature_names is None:
        feature_names = ["x0", "x1"]

    model = PySRRegressor(
        progress=False,
        niterations=1,
        extra_sympy_mappings={},
        output_jax_format=False,
        model_selection="accuracy",
        equation_file="equation_file.csv",
    )

    # Set up internal parameters as if it had been fitted:
    if isinstance(equations, list):
        # Multi-output.
        model.equation_file_ = "equation_file.csv"
        model.nout_ = len(equations)
        model.selection_mask_ = None
        model.feature_names_in_ = np.array(feature_names, dtype=object)
        for i in range(model.nout_):
            equations[i]["complexity loss equation".split(" ")].to_csv(
                f"equation_file.csv.out{i+1}.bkup"
            )
    else:
        model.equation_file_ = "equation_file.csv"
        model.nout_ = 1
        model.selection_mask_ = None
        model.feature_names_in_ = np.array(feature_names, dtype=object)
        equations["complexity loss equation".split(" ")].to_csv(
            "equation_file.csv.bkup"
        )

    model.refresh()

    return model


class TestBest(unittest.TestCase):
    def setUp(self):
        self.rstate = np.random.RandomState(0)
        self.X = self.rstate.randn(10, 2)
        self.y = np.cos(self.X[:, 0]) ** 2
        equations = pd.DataFrame(
            {
                "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
                "loss": [1.0, 0.1, 1e-5],
                "complexity": [1, 2, 3],
            }
        )
        self.model = manually_create_model(equations)
        self.equations_ = self.model.equations_

    def test_best(self):
        self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)

    def test_index_selection(self):
        self.assertEqual(self.model.sympy(-1), sympy.cos(sympy.Symbol("x0")) ** 2)
        self.assertEqual(self.model.sympy(2), sympy.cos(sympy.Symbol("x0")) ** 2)
        self.assertEqual(self.model.sympy(1), sympy.cos(sympy.Symbol("x0")))
        self.assertEqual(self.model.sympy(0), 1.0)

    def test_best_tex(self):
        self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")

    def test_best_lambda(self):
        X = self.X
        y = self.y
        for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
            np.testing.assert_almost_equal(f(X), y, decimal=3)

    def test_all_selection_strategies(self):
        equations = pd.DataFrame(
            dict(
                loss=[1.0, 0.1, 0.01, 0.001 * 1.4, 0.001],
                score=[0.5, 1.0, 0.5, 0.5, 0.3],
            )
        )
        idx_accuracy = idx_model_selection(equations, "accuracy")
        self.assertEqual(idx_accuracy, 4)
        idx_best = idx_model_selection(equations, "best")
        self.assertEqual(idx_best, 3)
        idx_score = idx_model_selection(equations, "score")
        self.assertEqual(idx_score, 1)


class TestFeatureSelection(unittest.TestCase):
    def setUp(self):
        self.rstate = np.random.RandomState(0)

    def test_feature_selection(self):
        X = self.rstate.randn(20000, 5)
        y = X[:, 2] ** 2 + X[:, 3] ** 2
        selected = run_feature_selection(X, y, select_k_features=2)
        self.assertEqual(sorted(selected), [2, 3])

    def test_feature_selection_handler(self):
        X = self.rstate.randn(20000, 5)
        y = X[:, 2] ** 2 + X[:, 3] ** 2
        var_names = [f"x{i}" for i in range(5)]
        selected_X, selection = _handle_feature_selection(
            X,
            select_k_features=2,
            variable_names=var_names,
            y=y,
        )
        self.assertTrue((2 in selection) and (3 in selection))
        selected_var_names = [var_names[i] for i in selection]
        self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
        np.testing.assert_array_equal(
            np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
        )


class TestMiscellaneous(unittest.TestCase):
    """Test miscellaneous functions."""

    def test_csv_to_pkl_conversion(self):
        """Test that csv filename to pkl filename works as expected."""
        tmpdir = Path(tempfile.mkdtemp())
        equation_file = tmpdir / "equations.389479384.28378374.csv"
        expected_pkl_file = tmpdir / "equations.389479384.28378374.pkl"

        # First, test inputting the paths:
        test_pkl_file = _csv_filename_to_pkl_filename(equation_file)
        self.assertEqual(test_pkl_file, str(expected_pkl_file))

        # Next, test inputting the strings.
        test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
        self.assertEqual(test_pkl_file, str(expected_pkl_file))

    def test_deprecation(self):
        """Ensure that deprecation works as expected.

        This should give a warning, and sets the correct value.
        """
        with self.assertWarns(FutureWarning):
            model = PySRRegressor(fractionReplaced=0.2)
        # This is a deprecated parameter, so we should get a warning.

        # The correct value should be set:
        self.assertEqual(model.fraction_replaced, 0.2)

    def test_deprecated_functions(self):
        with self.assertWarns(FutureWarning):
            install()

        _jl = None

        with self.assertWarns(FutureWarning):
            _jl = init_julia()

        self.assertEqual(_jl, jl)

    def test_power_law_warning(self):
        """Ensure that a warning is given for a power law operator."""
        with self.assertWarns(UserWarning):
            _process_constraints(["^"], [], {})

    def test_size_warning(self):
        """Ensure that a warning is given for a large input size."""
        model = PySRRegressor()
        X = np.random.randn(10001, 2)
        y = np.random.randn(10001)
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            with self.assertRaises(Exception) as context:
                model.fit(X, y)
            self.assertIn("more than 10,000", str(context.exception))

    def test_feature_warning(self):
        """Ensure that a warning is given for large number of features."""
        model = PySRRegressor()
        X = np.random.randn(100, 10)
        y = np.random.randn(100)
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            with self.assertRaises(Exception) as context:
                model.fit(X, y)
            self.assertIn("with 10 features or more", str(context.exception))

    def test_deterministic_warnings(self):
        """Ensure that warnings are given for determinism"""
        model = PySRRegressor(random_state=0)
        X = np.random.randn(100, 2)
        y = np.random.randn(100)
        with warnings.catch_warnings():
            warnings.simplefilter("error")
            with self.assertRaises(Exception) as context:
                model.fit(X, y)
            self.assertIn("`deterministic`", str(context.exception))

    def test_deterministic_errors(self):
        """Setting deterministic without random_state should error"""
        model = PySRRegressor(deterministic=True)
        X = np.random.randn(100, 2)
        y = np.random.randn(100)
        with self.assertRaises(ValueError):
            model.fit(X, y)

    def test_extra_sympy_mappings_undefined(self):
        """extra_sympy_mappings=None errors for custom operators"""
        model = PySRRegressor(unary_operators=["square2(x) = x^2"])
        X = np.random.randn(100, 2)
        y = np.random.randn(100)
        with self.assertRaises(ValueError):
            model.fit(X, y)

    def test_sympy_function_fails_as_variable(self):
        model = PySRRegressor()
        X = np.random.randn(100, 2)
        y = np.random.randn(100)
        with self.assertRaises(ValueError) as cm:
            model.fit(X, y, variable_names=["x1", "N"])
        self.assertIn("Variable name", str(cm.exception))

    def test_bad_variable_names_fail(self):
        model = PySRRegressor()
        X = np.random.randn(100, 1)
        y = np.random.randn(100)

        with self.assertRaises(ValueError) as cm:
            model.fit(X, y, variable_names=["Tr(Tij)"])
        self.assertIn("Invalid variable name", str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            model.fit(X, y, variable_names=["f{c}"])
        self.assertIn("Invalid variable name", str(cm.exception))

    def test_bad_kwargs(self):
        bad_kwargs = [
            dict(
                kwargs=dict(
                    elementwise_loss="g(x, y) = 0.0", loss_function="f(*args) = 0.0"
                ),
                error=ValueError,
            ),
            dict(
                kwargs=dict(maxsize=3),
                error=ValueError,
            ),
            dict(
                kwargs=dict(tournament_selection_n=10, population_size=3),
                error=ValueError,
            ),
            dict(
                kwargs=dict(optimizer_algorithm="COBYLA"),
                error=NotImplementedError,
            ),
            dict(
                kwargs=dict(
                    constraints={
                        "+": (3, 5),
                    }
                ),
                error=NotImplementedError,
            ),
            dict(
                kwargs=dict(binary_operators=["α(x, y) = x - y"]),
                error=ValueError,
            ),
            dict(
                kwargs=dict(model_selection="unknown"),
                error=NotImplementedError,
            ),
        ]
        for opt in bad_kwargs:
            model = PySRRegressor(**opt["kwargs"], niterations=1)
            with self.assertRaises(opt["error"]):
                model.fit([[1]], [1])
                model.get_best()
                print("Failed", opt["kwargs"])

    def test_pickle_with_temp_equation_file(self):
        """If we have a temporary equation file, unpickle the estimator."""
        model = PySRRegressor(
            populations=int(1 + DEFAULT_POPULATIONS / 5),
            temp_equation_file=True,
            procs=0,
            multithreading=False,
        )
        nout = 3
        X = np.random.randn(100, 2)
        y = np.random.randn(100, nout)
        model.fit(X, y)
        contents = model.equation_file_contents_.copy()

        y_predictions = model.predict(X)

        equation_file_base = model.equation_file_
        for i in range(1, nout + 1):
            assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")

        with tempfile.NamedTemporaryFile() as pickle_file:
            pkl.dump(model, pickle_file)
            pickle_file.seek(0)
            model2 = pkl.load(pickle_file)

        contents2 = model2.equation_file_contents_
        cols_to_check = ["equation", "loss", "complexity"]
        for frame1, frame2 in zip(contents, contents2):
            pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])

        y_predictions2 = model2.predict(X)
        np.testing.assert_array_equal(y_predictions, y_predictions2)

    def test_scikit_learn_compatibility(self):
        """Test PySRRegressor compatibility with scikit-learn."""
        model = PySRRegressor(
            niterations=int(1 + DEFAULT_NITERATIONS / 10),
            populations=int(1 + DEFAULT_POPULATIONS / 3),
            ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
            verbosity=0,
            progress=False,
            random_state=0,
            deterministic=True,  # Deterministic as tests require this.
            procs=0,
            multithreading=False,
            warm_start=False,
            temp_equation_file=True,
        )  # Return early.

        check_generator = check_estimator(model, generate_only=True)
        exception_messages = []
        for _, check in check_generator:
            if check.func.__name__ == "check_complex_data":
                # We can use complex data, so avoid this check.
                continue
            try:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    check(model)
                print("Passed", check.func.__name__)
            except Exception:
                error_message = str(traceback.format_exc())
                exception_messages.append(
                    f"{check.func.__name__}:\n" + error_message + "\n"
                )
                print("Failed", check.func.__name__, "with:")
                # Add a leading tab to error message, which
                # might be multi-line:
                print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
        # If any checks failed don't let the test pass.
        self.assertEqual(len(exception_messages), 0)

    def test_param_groupings(self):
        """Test that param_groupings are complete"""
        param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
        if not param_groupings_file.exists():
            return

        # Read the file, discarding lines ending in ":",
        # and removing leading "\s*-\s*":
        params = []
        with open(param_groupings_file, "r") as f:
            for line in f.readlines():
                if line.strip().endswith(":"):
                    continue
                if line.strip().startswith("-"):
                    params.append(line.strip()[1:].strip())

        regressor_params = [
            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
        ]

        # Check the sets are equal:
        self.assertSetEqual(set(params), set(regressor_params))


TRUE_PREAMBLE = "\n".join(
    [
        r"\usepackage{breqn}",
        r"\usepackage{booktabs}",
        "",
        "...",
        "",
    ]
)


class TestLaTeXTable(unittest.TestCase):
    def setUp(self):
        equations = pd.DataFrame(
            dict(
                equation=["x0", "cos(x0)", "x0 + x1 - cos(x1 * x0)"],
                loss=[1.052, 0.02315, 1.12347e-15],
                complexity=[1, 2, 8],
            )
        )
        self.model = manually_create_model(equations)
        self.maxDiff = None

    def create_true_latex(self, middle_part, include_score=False):
        if include_score:
            true_latex_table_str = r"""
                \begin{table}[h]
                \begin{center}
                \begin{tabular}{@{}cccc@{}}
                \toprule
                Equation & Complexity & Loss & Score \\
                \midrule"""
        else:
            true_latex_table_str = r"""
                \begin{table}[h]
                \begin{center}
                \begin{tabular}{@{}ccc@{}}
                \toprule
                Equation & Complexity & Loss \\
                \midrule"""
        true_latex_table_str += middle_part
        true_latex_table_str += r"""\bottomrule
            \end{tabular}
            \end{center}
            \end{table}
        """
        # First, remove empty lines:
        true_latex_table_str = "\n".join(
            [line.strip() for line in true_latex_table_str.split("\n") if len(line) > 0]
        )
        return true_latex_table_str.strip()

    def test_simple_table(self):
        latex_table_str = self.model.latex_table(
            columns=["equation", "complexity", "loss"]
        )
        middle_part = r"""
            $y = x_{0}$ & $1$ & $1.05$ \\
            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ \\
            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ \\
        """
        true_latex_table_str = (
            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
        )
        self.assertEqual(latex_table_str, true_latex_table_str)

    def test_other_precision(self):
        latex_table_str = self.model.latex_table(
            precision=5, columns=["equation", "complexity", "loss"]
        )
        middle_part = r"""
            $y = x_{0}$ & $1$ & $1.0520$ \\
            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.023150$ \\
            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.1235 \cdot 10^{-15}$ \\
        """
        true_latex_table_str = (
            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
        )
        self.assertEqual(latex_table_str, true_latex_table_str)

    def test_include_score(self):
        latex_table_str = self.model.latex_table()
        middle_part = r"""
            $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ & $5.11$ \\
        """
        true_latex_table_str = (
            TRUE_PREAMBLE
            + "\n"
            + self.create_true_latex(middle_part, include_score=True)
        )
        self.assertEqual(latex_table_str, true_latex_table_str)

    def test_last_equation(self):
        latex_table_str = self.model.latex_table(
            indices=[2], columns=["equation", "complexity", "loss"]
        )
        middle_part = r"""
            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ \\
        """
        true_latex_table_str = (
            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
        )
        self.assertEqual(latex_table_str, true_latex_table_str)

    def test_multi_output(self):
        equations1 = pd.DataFrame(
            dict(
                equation=["x0", "cos(x0)", "x0 + x1 - cos(x1 * x0)"],
                loss=[1.052, 0.02315, 1.12347e-15],
                complexity=[1, 2, 8],
            )
        )
        equations2 = pd.DataFrame(
            dict(
                equation=["x1", "cos(x1)", "x0 * x0 * x1"],
                loss=[1.32, 0.052, 2e-15],
                complexity=[1, 2, 5],
            )
        )
        equations = [equations1, equations2]
        model = manually_create_model(equations)
        middle_part_1 = r"""
            $y_{0} = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
            $y_{0} = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
            $y_{0} = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ & $5.11$ \\
        """
        middle_part_2 = r"""
            $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
            $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
            $y_{1} = x_{0}^{2} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
        """
        true_latex_table_str = "\n\n".join(
            self.create_true_latex(part, include_score=True)
            for part in [middle_part_1, middle_part_2]
        )
        true_latex_table_str = TRUE_PREAMBLE + "\n" + true_latex_table_str
        latex_table_str = model.latex_table()

        self.assertEqual(latex_table_str, true_latex_table_str)

    def test_latex_float_precision(self):
        """Test that we can print latex expressions with custom precision"""
        expr = sympy.Float(4583.4485748, dps=50)
        self.assertEqual(sympy2latex(expr, prec=6), r"4583.45")
        self.assertEqual(sympy2latex(expr, prec=5), r"4583.4")
        self.assertEqual(sympy2latex(expr, prec=4), r"4583.")
        self.assertEqual(sympy2latex(expr, prec=3), r"4.58 \cdot 10^{3}")
        self.assertEqual(sympy2latex(expr, prec=2), r"4.6 \cdot 10^{3}")

        # Multiple numbers:
        x = sympy.Symbol("x")
        expr = x * 3232.324857384 - 1.4857485e-10
        self.assertEqual(
            sympy2latex(expr, prec=2), r"3.2 \cdot 10^{3} x - 1.5 \cdot 10^{-10}"
        )
        self.assertEqual(
            sympy2latex(expr, prec=3), r"3.23 \cdot 10^{3} x - 1.49 \cdot 10^{-10}"
        )
        self.assertEqual(
            sympy2latex(expr, prec=8), r"3232.3249 x - 1.4857485 \cdot 10^{-10}"
        )

    def test_latex_break_long_equation(self):
        """Test that we can break a long equation inside the table"""
        long_equation = """
        - cos(x1 * x0) + 3.2 * x0 - 1.2 * x1 + x1 * x1 * x1 + x0 * x0 * x0
        + 5.2 * sin(0.3256 * sin(x2) - 2.6 * x0) + x0 * x0 * x0 * x0 * x0
        + cos(cos(x1 * x0) + 3.2 * x0 - 1.2 * x1 + x1 * x1 * x1 + x0 * x0 * x0)
        """
        long_equation = "".join(long_equation.split("\n")).strip()
        equations = pd.DataFrame(
            dict(
                equation=["x0", "cos(x0)", long_equation],
                loss=[1.052, 0.02315, 1.12347e-15],
                complexity=[1, 2, 30],
            )
        )
        model = manually_create_model(equations)
        latex_table_str = model.latex_table()
        middle_part = r"""
        $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
        $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
        \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0}^{5} + x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} - 5.20 \sin{\left(2.60 x_{0} - 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
        """
        true_latex_table_str = (
            TRUE_PREAMBLE
            + "\n"
            + self.create_true_latex(middle_part, include_score=True)
        )
        self.assertEqual(latex_table_str, true_latex_table_str)


class TestDimensionalConstraints(unittest.TestCase):
    def setUp(self):
        self.default_test_kwargs = dict(
            progress=False,
            model_selection="accuracy",
            niterations=DEFAULT_NITERATIONS * 2,
            populations=DEFAULT_POPULATIONS * 2,
            temp_equation_file=True,
        )
        self.rstate = np.random.RandomState(0)
        self.X = self.rstate.randn(100, 5)

    def test_dimensional_constraints(self):
        y = np.cos(self.X[:, [0, 1]])
        model = PySRRegressor(
            binary_operators=[
                "my_add(x, y) = x + y",
                "my_sub(x, y) = x - y",
                "my_mul(x, y) = x * y",
            ],
            unary_operators=["my_cos(x) = cos(x)"],
            **self.default_test_kwargs,
            early_stop_condition=1e-8,
            select_k_features=3,
            extra_sympy_mappings={
                "my_cos": sympy.cos,
                "my_add": lambda x, y: x + y,
                "my_sub": lambda x, y: x - y,
                "my_mul": lambda x, y: x * y,
            },
        )
        model.fit(self.X, y, X_units=["m", "m", "m", "m", "m"], y_units=["m", "m"])

        # The best expression should have complexity larger than just 2:
        for i in range(2):
            self.assertGreater(model.get_best()[i]["complexity"], 2)
            self.assertLess(model.get_best()[i]["loss"], 1e-6)
            self.assertGreater(
                model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
            )

    def test_unit_checks(self):
        """This just checks the number of units passed"""
        use_custom_variable_names = False
        variable_names = None
        weights = None
        args = (use_custom_variable_names, variable_names, weights)
        valid_units = [
            (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
            (np.ones((10, 1)), np.ones(10), ["m/s"], None),
            (np.ones((10, 1)), np.ones(10), None, "m/s"),
            (np.ones((10, 1)), np.ones(10), None, ["m/s"]),
            (np.ones((10, 1)), np.ones((10, 1)), None, ["m/s"]),
            (np.ones((10, 1)), np.ones((10, 2)), None, ["m/s", ""]),
        ]
        for X, y, X_units, y_units in valid_units:
            _check_assertions(
                X,
                *args,
                y,
                X_units,
                y_units,
            )
        invalid_units = [
            (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], None),
            (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], "m"),
            (np.ones((10, 2)), np.ones((10, 2)), ["m/s", "s"], ["m"]),
            (np.ones((10, 1)), np.ones((10, 1)), "m/s", ["m"]),
        ]
        for X, y, X_units, y_units in invalid_units:
            with self.assertRaises(ValueError):
                _check_assertions(
                    X,
                    *args,
                    y,
                    X_units,
                    y_units,
                )

    def test_unit_propagation(self):
        """Check that units are propagated correctly.

        This also tests that variables have the correct names.
        """
        X = np.ones((100, 3))
        y = np.ones((100, 1))
        temp_dir = Path(tempfile.mkdtemp())
        equation_file = str(temp_dir / "equation_file.csv")
        model = PySRRegressor(
            binary_operators=["+", "*"],
            early_stop_condition="(l, c) -> l < 1e-6 && c == 3",
            progress=False,
            model_selection="accuracy",
            niterations=DEFAULT_NITERATIONS * 2,
            populations=DEFAULT_POPULATIONS * 2,
            complexity_of_constants=10,
            weight_mutate_constant=0.0,
            should_optimize_constants=False,
            multithreading=False,
            deterministic=True,
            procs=0,
            random_state=0,
            equation_file=equation_file,
            warm_start=True,
        )
        model.fit(
            X,
            y,
            X_units=["m", "s", "A"],
            y_units=["m*A"],
        )
        best = model.get_best()
        self.assertIn("x0", best["equation"])
        self.assertNotIn("x1", best["equation"])
        self.assertIn("x2", best["equation"])
        self.assertEqual(best["complexity"], 3)
        self.assertEqual(model.equations_.iloc[0].complexity, 1)
        self.assertGreater(model.equations_.iloc[0].loss, 1e-6)

        # With pkl file:
        pkl_file = str(temp_dir / "equation_file.pkl")
        model2 = PySRRegressor.from_file(pkl_file)
        best2 = model2.get_best()
        self.assertIn("x0", best2["equation"])

        # From csv file alone (we need to delete pkl file:)
        # First, we delete the pkl file:
        os.remove(pkl_file)
        model3 = PySRRegressor.from_file(
            equation_file, binary_operators=["+", "*"], n_features_in=X.shape[1]
        )
        best3 = model3.get_best()
        self.assertIn("x0", best3["equation"])

        # Try warm start, but with no units provided (should
        # be a different dataset, and thus different result):
        model.fit(X, y)
        model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
        self.assertEqual(model.equations_.iloc[0].complexity, 1)
        self.assertLess(model.equations_.iloc[0].loss, 1e-6)


# TODO: Determine desired behavior if second .fit() call does not have units


def runtests(just_tests=False):
    """Run all tests in test.py."""
    test_cases = [
        TestPipeline,
        TestBest,
        TestFeatureSelection,
        TestMiscellaneous,
        TestLaTeXTable,
        TestDimensionalConstraints,
    ]
    if just_tests:
        return test_cases
    suite = unittest.TestSuite()
    loader = unittest.TestLoader()
    for test_case in test_cases:
        suite.addTests(loader.loadTestsFromTestCase(test_case))
    runner = unittest.TextTestRunner()
    return runner.run(suite)