Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Jun 16, 2024

Commit

c2ab38b

unverified ·

2 Parent(s): 96d6ea9 cabda12

Merge pull request #649 from MilesCranmer/var-complexity

Browse files

Files changed (10) hide show

.github/workflows/CI.yml +1 -1
pyproject.toml +2 -1
pysr/juliapkg.json +1 -1
pysr/sr.py +100 -25
pysr/test/params.py +1 -1
pysr/test/test.py +83 -12
pysr/test/test_jax.py +5 -2
pysr/test/test_startup.py +3 -2
pysr/test/test_torch.py +1 -1
pysr/utils.py +12 -0

.github/workflows/CI.yml CHANGED Viewed

@@ -90,7 +90,7 @@ jobs:
       - name: "Coveralls"
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           COVERALLS_PARALLEL: true
         run: coveralls --service=github

       - name: "Coveralls"
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}-${{ matrix.test-id }}
           COVERALLS_PARALLEL: true
         run: coveralls --service=github

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pysr"
-version = "0.18.4"
 authors = [
     {name = "Miles Cranmer", email = "[email protected]"},
 ]
@@ -41,4 +41,5 @@ dev-dependencies = [
     "pandas-stubs>=2.2.1.240316",
     "types-pytz>=2024.1.0.20240417",
     "types-openpyxl>=3.1.0.20240428",
 ]

 [project]
 name = "pysr"
+version = "0.18.5"
 authors = [
     {name = "Miles Cranmer", email = "[email protected]"},
 ]
     "pandas-stubs>=2.2.1.240316",
     "types-pytz>=2024.1.0.20240417",
     "types-openpyxl>=3.1.0.20240428",
+    "coverage>=7.5.3",
 ]

pysr/juliapkg.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "packages": {
         "SymbolicRegression": {
             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
-            "version": "=0.24.4"
         },
         "Serialization": {
             "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",

     "packages": {
         "SymbolicRegression": {
             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
+            "version": "=0.24.5"
         },
         "Serialization": {
             "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",

pysr/sr.py CHANGED Viewed

@@ -1,8 +1,6 @@
 """Define the PySRRegressor scikit-learn interface."""
 import copy
-import difflib
-import inspect
 import os
 import pickle as pkl
 import re
@@ -57,6 +55,7 @@ from .utils import (
     _preprocess_julia_floats,
     _safe_check_feature_names_in,
     _subscriptify,
 )
 ALREADY_RAN = False
@@ -122,7 +121,7 @@ def _maybe_create_inline_operators(
                         "and underscores are allowed."
                     )
                 if (extra_sympy_mappings is None) or (
-                    not function_name in extra_sympy_mappings
                 ):
                     raise ValueError(
                         f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
@@ -139,6 +138,7 @@ def _check_assertions(
     X,
     use_custom_variable_names,
     variable_names,
     weights,
     y,
     X_units,
@@ -163,6 +163,13 @@ def _check_assertions(
                     "and underscores are allowed."
                 )
             assert_valid_sympy_symbol(var_name)
     if X_units is not None and len(X_units) != X.shape[1]:
         raise ValueError(
             "The number of units in `X_units` must equal the number of features in `X`."
@@ -333,7 +340,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         `idx` argument to the function, which is `nothing`
         for non-batched, and a 1D array of indices for batched.
         Default is `None`.
-    complexity_of_operators : dict[str, float]
         If you would like to use a complexity other than 1 for an
         operator, specify the complexity here. For example,
         `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
@@ -342,10 +349,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         numbers for a complexity, and the total complexity of a tree
         will be rounded to the nearest integer after computing.
         Default is `None`.
-    complexity_of_constants : float
         Complexity of constants. Default is `1`.
-    complexity_of_variables : float
-        Complexity of variables. Default is `1`.
     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
@@ -691,6 +701,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     n_features_in_: int
     feature_names_in_: ArrayLike[str]
     display_feature_names_in_: ArrayLike[str]
     X_units_: Union[ArrayLike[str], None]
     y_units_: Union[str, ArrayLike[str], None]
     nout_: int
@@ -722,7 +733,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         loss_function: Optional[str] = None,
         complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
         complexity_of_constants: Union[int, float] = 1,
-        complexity_of_variables: Union[int, float] = 1,
         parsimony: float = 0.0032,
         dimensional_constraint_penalty: Optional[float] = None,
         dimensionless_constants_only: bool = False,
@@ -1344,13 +1355,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         return param_container
     def _validate_and_set_fit_params(
-        self, X, y, Xresampled, weights, variable_names, X_units, y_units
     ) -> Tuple[
         ndarray,
         ndarray,
         Optional[ndarray],
         Optional[ndarray],
         ArrayLike[str],
         Optional[ArrayLike[str]],
         Optional[Union[str, ArrayLike[str]]],
     ]:
@@ -1375,6 +1395,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             for that particular element of y.
         variable_names : ndarray of length n_features
             Names of each variable in the training dataset, `X`.
         X_units : list[str] of length n_features
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str] of length n_out
@@ -1422,6 +1444,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 "Please use valid names instead."
             )
         # Data validation and feature name fetching via sklearn
         # This method sets the n_features_in_ attribute
         if Xresampled is not None:
@@ -1452,10 +1490,20 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         else:
             raise NotImplementedError("y shape not supported!")
         self.X_units_ = copy.deepcopy(X_units)
         self.y_units_ = copy.deepcopy(y_units)
-        return X, y, Xresampled, weights, variable_names, X_units, y_units
     def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
         raw_out = self._validate_data(X=X, y=y, reset=True, multi_output=True)  # type: ignore
@@ -1471,6 +1519,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         y: ndarray,
         Xresampled: Union[ndarray, None],
         variable_names: ArrayLike[str],
         X_units: Union[ArrayLike[str], None],
         y_units: Union[ArrayLike[str], str, None],
         random_state: np.random.RandomState,
@@ -1493,6 +1542,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         variable_names : list[str]
             Names of each variable in the training dataset, `X`.
             Of length `n_features`.
         X_units : list[str]
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str]
@@ -1543,6 +1594,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 ],
             )
             if X_units is not None:
                 X_units = cast(
                     ArrayLike[str],
@@ -1567,7 +1626,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             else:
                 X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
-        return X, y, variable_names, X_units, y_units
     def _run(
         self,
@@ -1624,6 +1683,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         nested_constraints = self.nested_constraints
         complexity_of_operators = self.complexity_of_operators
         cluster_manager = self.cluster_manager
         # Start julia backend processes
@@ -1668,6 +1728,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             complexity_of_operators = jl.seval(complexity_of_operators_str)
         # TODO: Refactor this into helper function
         custom_loss = jl.seval(
             str(self.elementwise_loss)
             if self.elementwise_loss is not None
@@ -1726,7 +1789,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             una_constraints=jl_array(una_constraints),
             complexity_of_operators=complexity_of_operators,
             complexity_of_constants=self.complexity_of_constants,
-            complexity_of_variables=self.complexity_of_variables,
             nested_constraints=nested_constraints,
             elementwise_loss=custom_loss,
             loss_function=custom_full_objective,
@@ -1871,6 +1934,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Xresampled=None,
         weights=None,
         variable_names: Optional[ArrayLike[str]] = None,
         X_units: Optional[ArrayLike[str]] = None,
         y_units: Optional[Union[str, ArrayLike[str]]] = None,
     ) -> "PySRRegressor":
@@ -1931,6 +1997,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self.selection_mask_ = None
             self.julia_state_stream_ = None
             self.julia_options_stream_ = None
             self.X_units_ = None
             self.y_units_ = None
@@ -1944,10 +2011,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             Xresampled,
             weights,
             variable_names,
             X_units,
             y_units,
         ) = self._validate_and_set_fit_params(
-            X, y, Xresampled, weights, variable_names, X_units, y_units
         )
         if X.shape[0] > 10000 and not self.batching:
@@ -1965,8 +2040,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         seed = cast(int, random_state.randint(0, 2**31 - 1))  # For julia random
         # Pre transformations (feature selection and denoising)
-        X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
-            X, y, Xresampled, variable_names, X_units, y_units, random_state
         )
         # Warn about large feature counts (still warn if feature count is large
@@ -1993,6 +2077,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             X,
             use_custom_variable_names,
             variable_names,
             weights,
             y,
             X_units,
@@ -2465,16 +2550,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         return with_preamble(table_string)
-def _suggest_keywords(cls, k: str) -> List[str]:
-    valid_keywords = [
-        param
-        for param in inspect.signature(cls.__init__).parameters
-        if param not in ["self", "kwargs"]
-    ]
-    suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
-    return suggestions
 def idx_model_selection(equations: pd.DataFrame, model_selection: str):
     """Select an expression and return its index."""
     if model_selection == "accuracy":

 """Define the PySRRegressor scikit-learn interface."""
 import copy
 import os
 import pickle as pkl
 import re
     _preprocess_julia_floats,
     _safe_check_feature_names_in,
     _subscriptify,
+    _suggest_keywords,
 )
 ALREADY_RAN = False
                         "and underscores are allowed."
                     )
                 if (extra_sympy_mappings is None) or (
+                    function_name not in extra_sympy_mappings
                 ):
                     raise ValueError(
                         f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
     X,
     use_custom_variable_names,
     variable_names,
+    complexity_of_variables,
     weights,
     y,
     X_units,
                     "and underscores are allowed."
                 )
             assert_valid_sympy_symbol(var_name)
+    if (
+        isinstance(complexity_of_variables, list)
+        and len(complexity_of_variables) != X.shape[1]
+    ):
+        raise ValueError(
+            "The number of elements in `complexity_of_variables` must equal the number of features in `X`."
+        )
     if X_units is not None and len(X_units) != X.shape[1]:
         raise ValueError(
             "The number of units in `X_units` must equal the number of features in `X`."
         `idx` argument to the function, which is `nothing`
         for non-batched, and a 1D array of indices for batched.
         Default is `None`.
+    complexity_of_operators : dict[str, Union[int, float]]
         If you would like to use a complexity other than 1 for an
         operator, specify the complexity here. For example,
         `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
         numbers for a complexity, and the total complexity of a tree
         will be rounded to the nearest integer after computing.
         Default is `None`.
+    complexity_of_constants : int | float
         Complexity of constants. Default is `1`.
+    complexity_of_variables : int | float
+        Global complexity of variables. To set different complexities for
+        different variables, pass a list of complexities to the `fit` method
+        with keyword `complexity_of_variables`. You cannot use both.
+        Default is `1`.
     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
     n_features_in_: int
     feature_names_in_: ArrayLike[str]
     display_feature_names_in_: ArrayLike[str]
+    complexity_of_variables_: Union[int, float, List[Union[int, float]], None]
     X_units_: Union[ArrayLike[str], None]
     y_units_: Union[str, ArrayLike[str], None]
     nout_: int
         loss_function: Optional[str] = None,
         complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
         complexity_of_constants: Union[int, float] = 1,
+        complexity_of_variables: Optional[Union[int, float]] = None,
         parsimony: float = 0.0032,
         dimensional_constraint_penalty: Optional[float] = None,
         dimensionless_constants_only: bool = False,
         return param_container
     def _validate_and_set_fit_params(
+        self,
+        X,
+        y,
+        Xresampled,
+        weights,
+        variable_names,
+        complexity_of_variables,
+        X_units,
+        y_units,
     ) -> Tuple[
         ndarray,
         ndarray,
         Optional[ndarray],
         Optional[ndarray],
         ArrayLike[str],
+        Union[int, float, List[Union[int, float]]],
         Optional[ArrayLike[str]],
         Optional[Union[str, ArrayLike[str]]],
     ]:
             for that particular element of y.
         variable_names : ndarray of length n_features
             Names of each variable in the training dataset, `X`.
+        complexity_of_variables : int | float | list[int | float]
+            Complexity of each variable in the training dataset, `X`.
         X_units : list[str] of length n_features
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str] of length n_out
                 "Please use valid names instead."
             )
+        if (
+            complexity_of_variables is not None
+            and self.complexity_of_variables is not None
+        ):
+            raise ValueError(
+                "You cannot set `complexity_of_variables` at both `fit` and `__init__`. "
+                "Pass it at `__init__` to set it to global default, OR use `fit` to set it for "
+                "each variable individually."
+            )
+        elif complexity_of_variables is not None:
+            complexity_of_variables = complexity_of_variables
+        elif self.complexity_of_variables is not None:
+            complexity_of_variables = self.complexity_of_variables
+        else:
+            complexity_of_variables = 1
         # Data validation and feature name fetching via sklearn
         # This method sets the n_features_in_ attribute
         if Xresampled is not None:
         else:
             raise NotImplementedError("y shape not supported!")
+        self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
         self.X_units_ = copy.deepcopy(X_units)
         self.y_units_ = copy.deepcopy(y_units)
+        return (
+            X,
+            y,
+            Xresampled,
+            weights,
+            variable_names,
+            complexity_of_variables,
+            X_units,
+            y_units,
+        )
     def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
         raw_out = self._validate_data(X=X, y=y, reset=True, multi_output=True)  # type: ignore
         y: ndarray,
         Xresampled: Union[ndarray, None],
         variable_names: ArrayLike[str],
+        complexity_of_variables: Union[int, float, List[Union[int, float]]],
         X_units: Union[ArrayLike[str], None],
         y_units: Union[ArrayLike[str], str, None],
         random_state: np.random.RandomState,
         variable_names : list[str]
             Names of each variable in the training dataset, `X`.
             Of length `n_features`.
+        complexity_of_variables : int | float | list[int | float]
+            Complexity of each variable in the training dataset, `X`.
         X_units : list[str]
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str]
                 ],
             )
+            if isinstance(complexity_of_variables, list):
+                complexity_of_variables = [
+                    complexity_of_variables[i]
+                    for i in range(len(complexity_of_variables))
+                    if selection_mask[i]
+                ]
+                self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
             if X_units is not None:
                 X_units = cast(
                     ArrayLike[str],
             else:
                 X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
+        return X, y, variable_names, complexity_of_variables, X_units, y_units
     def _run(
         self,
         nested_constraints = self.nested_constraints
         complexity_of_operators = self.complexity_of_operators
+        complexity_of_variables = self.complexity_of_variables_
         cluster_manager = self.cluster_manager
         # Start julia backend processes
             complexity_of_operators = jl.seval(complexity_of_operators_str)
         # TODO: Refactor this into helper function
+        if isinstance(complexity_of_variables, list):
+            complexity_of_variables = jl_array(complexity_of_variables)
         custom_loss = jl.seval(
             str(self.elementwise_loss)
             if self.elementwise_loss is not None
             una_constraints=jl_array(una_constraints),
             complexity_of_operators=complexity_of_operators,
             complexity_of_constants=self.complexity_of_constants,
+            complexity_of_variables=complexity_of_variables,
             nested_constraints=nested_constraints,
             elementwise_loss=custom_loss,
             loss_function=custom_full_objective,
         Xresampled=None,
         weights=None,
         variable_names: Optional[ArrayLike[str]] = None,
+        complexity_of_variables: Optional[
+            Union[int, float, List[Union[int, float]]]
+        ] = None,
         X_units: Optional[ArrayLike[str]] = None,
         y_units: Optional[Union[str, ArrayLike[str]]] = None,
     ) -> "PySRRegressor":
             self.selection_mask_ = None
             self.julia_state_stream_ = None
             self.julia_options_stream_ = None
+            self.complexity_of_variables_ = None
             self.X_units_ = None
             self.y_units_ = None
             Xresampled,
             weights,
             variable_names,
+            complexity_of_variables,
             X_units,
             y_units,
         ) = self._validate_and_set_fit_params(
+            X,
+            y,
+            Xresampled,
+            weights,
+            variable_names,
+            complexity_of_variables,
+            X_units,
+            y_units,
         )
         if X.shape[0] > 10000 and not self.batching:
         seed = cast(int, random_state.randint(0, 2**31 - 1))  # For julia random
         # Pre transformations (feature selection and denoising)
+        X, y, variable_names, complexity_of_variables, X_units, y_units = (
+            self._pre_transform_training_data(
+                X,
+                y,
+                Xresampled,
+                variable_names,
+                complexity_of_variables,
+                X_units,
+                y_units,
+                random_state,
+            )
         )
         # Warn about large feature counts (still warn if feature count is large
             X,
             use_custom_variable_names,
             variable_names,
+            complexity_of_variables,
             weights,
             y,
             X_units,
         return with_preamble(table_string)
 def idx_model_selection(equations: pd.DataFrame, model_selection: str):
     """Select an expression and return its index."""
     if model_selection == "accuracy":

pysr/test/params.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import inspect
-from .. import PySRRegressor
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

 import inspect
+from pysr import PySRRegressor
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

pysr/test/test.py CHANGED Viewed

@@ -11,17 +11,18 @@ import pandas as pd
 import sympy
 from sklearn.utils.estimator_checks import check_estimator
-from .. import PySRRegressor, install, jl
-from ..export_latex import sympy2latex
-from ..feature_selection import _handle_feature_selection, run_feature_selection
-from ..julia_helpers import init_julia
-from ..sr import (
     _check_assertions,
     _process_constraints,
     _suggest_keywords,
     idx_model_selection,
 )
-from ..utils import _csv_filename_to_pkl_filename
 from .params import (
     DEFAULT_NCYCLES,
     DEFAULT_NITERATIONS,
@@ -29,6 +30,11 @@ from .params import (
     DEFAULT_POPULATIONS,
 )
 class TestPipeline(unittest.TestCase):
     def setUp(self):
@@ -176,6 +182,63 @@ class TestPipeline(unittest.TestCase):
         self.assertLessEqual(mse1, 1e-4)
         self.assertLessEqual(mse2, 1e-4)
     def test_multioutput_weighted_with_callable_temp_equation(self):
         X = self.X.copy()
         y = X[:, [0, 1]] ** 2
@@ -313,7 +376,10 @@ class TestPipeline(unittest.TestCase):
                 "unused_feature": self.rstate.randn(500),
             }
         )
-        true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
         y = true_fn(X)
         noise = self.rstate.randn(500) * 0.01
         y = y + noise
@@ -372,13 +438,12 @@ class TestPipeline(unittest.TestCase):
     def test_load_model(self):
         """See if we can load a ran model from the equation file."""
-        csv_file_data = """
-        Complexity,Loss,Equation
         1,0.19951081,"1.9762075"
         3,0.12717344,"(f0 + 1.4724599)"
         4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
         # Strip the indents:
-        csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
         for from_backup in [False, True]:
             rand_dir = Path(tempfile.mkdtemp())
@@ -430,7 +495,7 @@ class TestPipeline(unittest.TestCase):
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
-        pickle_file = rand_dir / "equations.pkl"
         model3 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
@@ -1081,8 +1146,14 @@ class TestDimensionalConstraints(unittest.TestCase):
         """This just checks the number of units passed"""
         use_custom_variable_names = False
         variable_names = None
         weights = None
-        args = (use_custom_variable_names, variable_names, weights)
         valid_units = [
             (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
             (np.ones((10, 1)), np.ones(10), ["m/s"], None),

 import sympy
 from sklearn.utils.estimator_checks import check_estimator
+from pysr import PySRRegressor, install, jl
+from pysr.export_latex import sympy2latex
+from pysr.feature_selection import _handle_feature_selection, run_feature_selection
+from pysr.julia_helpers import init_julia
+from pysr.sr import (
     _check_assertions,
     _process_constraints,
     _suggest_keywords,
     idx_model_selection,
 )
+from pysr.utils import _csv_filename_to_pkl_filename
 from .params import (
     DEFAULT_NCYCLES,
     DEFAULT_NITERATIONS,
     DEFAULT_POPULATIONS,
 )
+# Disables local saving:
+os.environ["SYMBOLIC_REGRESSION_IS_TESTING"] = os.environ.get(
+    "SYMBOLIC_REGRESSION_IS_TESTING", "true"
+)
 class TestPipeline(unittest.TestCase):
     def setUp(self):
         self.assertLessEqual(mse1, 1e-4)
         self.assertLessEqual(mse2, 1e-4)
+    def test_custom_variable_complexity(self):
+        for outer in (True, False):
+            for case in (1, 2):
+                y = self.X[:, [0, 1]]
+                if case == 1:
+                    kwargs = dict(complexity_of_variables=[2, 3])
+                elif case == 2:
+                    kwargs = dict(complexity_of_variables=2)
+                if outer:
+                    outer_kwargs = kwargs
+                    inner_kwargs = dict()
+                else:
+                    outer_kwargs = dict()
+                    inner_kwargs = kwargs
+                model = PySRRegressor(
+                    binary_operators=["+"],
+                    verbosity=0,
+                    **self.default_test_kwargs,
+                    early_stop_condition=(
+                        f"stop_if_{case}(l, c) = l < 1e-8 && c <= {3 if case == 1 else 2}"
+                    ),
+                    **outer_kwargs,
+                )
+                model.fit(self.X[:, [0, 1]], y, **inner_kwargs)
+                self.assertLessEqual(model.get_best()[0]["loss"], 1e-8)
+                self.assertLessEqual(model.get_best()[1]["loss"], 1e-8)
+                self.assertEqual(model.get_best()[0]["complexity"], 2)
+                self.assertEqual(
+                    model.get_best()[1]["complexity"], 3 if case == 1 else 2
+                )
+    def test_error_message_custom_variable_complexity(self):
+        X = np.ones((10, 2))
+        y = np.ones((10,))
+        model = PySRRegressor()
+        with self.assertRaises(ValueError) as cm:
+            model.fit(X, y, complexity_of_variables=[1, 2, 3])
+        self.assertIn(
+            "number of elements in `complexity_of_variables`", str(cm.exception)
+        )
+    def test_error_message_both_variable_complexity(self):
+        X = np.ones((10, 2))
+        y = np.ones((10,))
+        model = PySRRegressor(complexity_of_variables=[1, 2])
+        with self.assertRaises(ValueError) as cm:
+            model.fit(X, y, complexity_of_variables=[1, 2, 3])
+        self.assertIn(
+            "You cannot set `complexity_of_variables` at both `fit` and `__init__`.",
+            str(cm.exception),
+        )
     def test_multioutput_weighted_with_callable_temp_equation(self):
         X = self.X.copy()
         y = X[:, [0, 1]] ** 2
                 "unused_feature": self.rstate.randn(500),
             }
         )
+        def true_fn(x):
+            return np.array(x["T"] + x["x"] ** 2 + 1.323837)
         y = true_fn(X)
         noise = self.rstate.randn(500) * 0.01
         y = y + noise
     def test_load_model(self):
         """See if we can load a ran model from the equation file."""
+        csv_file_data = """Complexity,Loss,Equation
         1,0.19951081,"1.9762075"
         3,0.12717344,"(f0 + 1.4724599)"
         4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
         # Strip the indents:
+        csv_file_data = "\n".join([line.strip() for line in csv_file_data.split("\n")])
         for from_backup in [False, True]:
             rand_dir = Path(tempfile.mkdtemp())
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
+        # pickle_file = rand_dir / "equations.pkl"
         model3 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
         """This just checks the number of units passed"""
         use_custom_variable_names = False
         variable_names = None
+        complexity_of_variables = 1
         weights = None
+        args = (
+            use_custom_variable_names,
+            variable_names,
+            complexity_of_variables,
+            weights,
+        )
         valid_units = [
             (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
             (np.ones((10, 1)), np.ones(10), ["m/s"], None),

pysr/test/test_jax.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 import pandas as pd
 import sympy
-from .. import PySRRegressor, sympy2jax
 class TestJAX(unittest.TestCase):
@@ -89,7 +89,10 @@ class TestJAX(unittest.TestCase):
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
-        cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

 import pandas as pd
 import sympy
+from pysr import PySRRegressor, sympy2jax
 class TestJAX(unittest.TestCase):
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
+        def cos_approx(x):
+            return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

pysr/test/test_startup.py CHANGED Viewed

@@ -9,8 +9,9 @@ from pathlib import Path
 import numpy as np
-from .. import PySRRegressor
-from ..julia_import import jl_version
 from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS

 import numpy as np
+from pysr import PySRRegressor
+from pysr.julia_import import jl_version
 from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS

pysr/test/test_torch.py CHANGED Viewed

@@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 import sympy
-from .. import PySRRegressor, sympy2torch
 class TestTorch(unittest.TestCase):

 import pandas as pd
 import sympy
+from pysr import PySRRegressor, sympy2torch
 class TestTorch(unittest.TestCase):

pysr/utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import os
 import re
 from pathlib import Path
@@ -61,3 +63,13 @@ def _subscriptify(i: int) -> str:
     For example, 123 -> "₁₂₃".
     """
     return "".join([chr(0x2080 + int(c)) for c in str(i)])

+import difflib
+import inspect
 import os
 import re
 from pathlib import Path
     For example, 123 -> "₁₂₃".
     """
     return "".join([chr(0x2080 + int(c)) for c in str(i)])
+def _suggest_keywords(cls, k: str) -> List[str]:
+    valid_keywords = [
+        param
+        for param in inspect.signature(cls.__init__).parameters
+        if param not in ["self", "kwargs"]
+    ]
+    suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
+    return suggestions