Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Jul 8, 2024

Commit

11be150

unverified ·

2 Parent(s): 0f7799e db44938

Merge branch 'master' into gui

Browse files

Files changed (41) hide show

.github/workflows/CI.yml +17 -23
.github/workflows/CI_Windows.yml +8 -14
.github/workflows/CI_docker.yml +6 -12
.github/workflows/CI_docker_large_nightly.yml +2 -2
.github/workflows/CI_large_nightly.yml +3 -3
.github/workflows/CI_mac.yml +7 -13
.github/workflows/docker_deploy.yml +6 -6
.github/workflows/update_backend.yml +0 -1
.gitignore +2 -0
.pre-commit-config.yaml +3 -3
Dockerfile +2 -2
README.md +1 -1
benchmarks/hyperparamopt.py +1 -0
benchmarks/print_best_model.py +1 -0
docs/examples.md +3 -1
docs/generate_papers.py +1 -0
environment.yml +1 -2
examples/pysr_demo.ipynb +1 -1
pyproject.toml +16 -2
pysr/denoising.py +18 -4
pysr/deprecated.py +1 -0
pysr/export_jax.py +4 -1
pysr/export_latex.py +13 -0
pysr/export_numpy.py +11 -2
pysr/export_sympy.py +15 -6
pysr/export_torch.py +9 -6
pysr/feature_selection.py +20 -3
pysr/julia_helpers.py +17 -5
pysr/julia_import.py +9 -21
pysr/juliapkg.json +1 -1
pysr/param_groupings.yml +1 -0
pysr/sklearn_monkeypatch.py +1 -2
pysr/sr.py +381 -193
pysr/test/__main__.py +1 -0
pysr/test/params.py +1 -1
pysr/test/test.py +220 -106
pysr/test/test_jax.py +41 -10
pysr/test/test_startup.py +3 -6
pysr/test/test_torch.py +36 -2
pysr/utils.py +22 -2
requirements.txt +2 -3

.github/workflows/CI.yml CHANGED Viewed

@@ -5,20 +5,14 @@ on:
     branches:
       - '**'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI.yml'
-      - 'setup.py'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
-      - '*'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI.yml'
-      - 'setup.py'
 jobs:
   test:
@@ -32,12 +26,12 @@ jobs:
     strategy:
       matrix:
         julia-version: ['1']
-        python-version: ['3.11']
         os: [ubuntu-latest]
         test-id: [main]
         include:
           - julia-version: '1.6'
-            python-version: '3.7'
             os: ubuntu-latest
             test-id: include
           - julia-version: '1'
@@ -48,11 +42,11 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
-        uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
-        uses: julia-actions/cache@v1
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false
@@ -90,7 +84,7 @@ jobs:
       - name: "Coveralls"
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           COVERALLS_PARALLEL: true
         run: coveralls --service=github
@@ -99,11 +93,11 @@ jobs:
     strategy:
       matrix:
         os: ['ubuntu-latest']
-        python-version: ['3.11']
         julia-version: ['1']
         include:
           - os: ubuntu-latest
-            python-version: '3.7'
             julia-version: '1.6'
     steps:
       - uses: actions/checkout@v4
@@ -122,7 +116,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        python-version: ['3.11']
         os: ['ubuntu-latest']
     steps:
@@ -144,7 +138,7 @@ jobs:
           activate-environment: pysr-test
           environment-file: environment.yml
       - name: "Cache Julia"
-        uses: julia-actions/cache@v1
         with:
           cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
           cache-packages: false
@@ -181,8 +175,8 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - '3.11'
-          - '3.7'
         os: ['ubuntu-latest']
     steps:
@@ -199,10 +193,10 @@ jobs:
             pip install mypy
       - name: "Install additional dependencies"
         run: python -m pip install jax jaxlib torch
-        if: ${{ matrix.python-version != '3.7' }}
       - name: "Run mypy"
         run: python -m mypy --install-types --non-interactive pysr
-        if: ${{ matrix.python-version != '3.7' }}
       - name: "Run compatible mypy"
         run: python -m mypy --ignore-missing-imports pysr
-        if: ${{ matrix.python-version == '3.7' }}

     branches:
       - '**'
     paths:
+      - '**'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
+      - 'master'
     paths:
+      - '**'
 jobs:
   test:
     strategy:
       matrix:
         julia-version: ['1']
+        python-version: ['3.12']
         os: [ubuntu-latest]
         test-id: [main]
         include:
           - julia-version: '1.6'
+            python-version: '3.8'
             os: ubuntu-latest
             test-id: include
           - julia-version: '1'
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
+        uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
+        uses: julia-actions/cache@v2
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false
       - name: "Coveralls"
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}-${{ matrix.test-id }}
           COVERALLS_PARALLEL: true
         run: coveralls --service=github
     strategy:
       matrix:
         os: ['ubuntu-latest']
+        python-version: ['3.12']
         julia-version: ['1']
         include:
           - os: ubuntu-latest
+            python-version: '3.8'
             julia-version: '1.6'
     steps:
       - uses: actions/checkout@v4
         shell: bash -l {0}
     strategy:
       matrix:
+        python-version: ['3.12']
         os: ['ubuntu-latest']
     steps:
           activate-environment: pysr-test
           environment-file: environment.yml
       - name: "Cache Julia"
+        uses: julia-actions/cache@v2
         with:
           cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
           cache-packages: false
     strategy:
       matrix:
         python-version:
+          - '3.12'
+          - '3.8'
         os: ['ubuntu-latest']
     steps:
             pip install mypy
       - name: "Install additional dependencies"
         run: python -m pip install jax jaxlib torch
+        if: ${{ matrix.python-version != '3.8' }}
       - name: "Run mypy"
         run: python -m mypy --install-types --non-interactive pysr
+        if: ${{ matrix.python-version != '3.8' }}
       - name: "Run compatible mypy"
         run: python -m mypy --ignore-missing-imports pysr
+        if: ${{ matrix.python-version == '3.8' }}

.github/workflows/CI_Windows.yml CHANGED Viewed

@@ -3,22 +3,16 @@ name: Windows
 on:
   push:
     branches:
-      - '**'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_Windows.yml'
-      - 'setup.py'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
-      - '*'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_Windows.yml'
-      - 'setup.py'
 jobs:
   test:
@@ -30,17 +24,17 @@ jobs:
     strategy:
       matrix:
         julia-version: ['1']
-        python-version: ['3.11']
         os: [windows-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
-        uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
-        uses: julia-actions/cache@v1
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false
@@ -52,7 +46,7 @@ jobs:
       - name: "Install PySR"
         run: |
             python -m pip install --upgrade pip
-            pip install pytest nbval
             pip install .
             python -c 'import pysr'
       - name: "Run tests"

 on:
   push:
     branches:
+      - 'master'
     paths:
+      - '**'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
+      - 'master'
     paths:
+      - '**'
 jobs:
   test:
     strategy:
       matrix:
         julia-version: ['1']
+        python-version: ['3.12']
         os: [windows-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
+        uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
+        uses: julia-actions/cache@v2
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false
       - name: "Install PySR"
         run: |
             python -m pip install --upgrade pip
+            pip install pytest nbval "numpy<2.0.0"
             pip install .
             python -c 'import pysr'
       - name: "Run tests"

.github/workflows/CI_docker.yml CHANGED Viewed

@@ -3,22 +3,16 @@ name: Docker
 on:
   push:
     branches:
-      - '**'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_docker.yml'
-      - 'setup.py'
-      - 'Dockerfile'
   pull_request:
     branches:
-      - '*'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_docker.yml'
-      - 'setup.py'
-      - 'Dockerfile'
 jobs:
   test:

 on:
   push:
     branches:
+      - 'master'
     paths:
+      - '**'
+    tags:
+      - 'v*.*.*'
   pull_request:
     branches:
+      - 'master'
     paths:
+      - '**'
 jobs:
   test:

.github/workflows/CI_docker_large_nightly.yml CHANGED Viewed

@@ -19,7 +19,7 @@ jobs:
       fail-fast: false
       matrix:
         julia-version: ['1.6', '1']
-        python-version: ['3.7', '3.11']
         os: [ubuntu-latest]
         arch: ['linux/amd64', 'linux/arm64']
@@ -27,7 +27,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
         with:
           platforms: all
       - name: Build docker

       fail-fast: false
       matrix:
         julia-version: ['1.6', '1']
+        python-version: ['3.8', '3.12']
         os: [ubuntu-latest]
         arch: ['linux/amd64', 'linux/arm64']
     steps:
       - uses: actions/checkout@v4
       - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
         with:
           platforms: all
       - name: Build docker

.github/workflows/CI_large_nightly.yml CHANGED Viewed

@@ -23,14 +23,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        julia-version: ['1.6', '1.8', '1.9']
-        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
         os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
-        uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
       - name: "Set up Python"

     strategy:
       fail-fast: false
       matrix:
+        julia-version: ['1.6', '1.8', '1.10']
+        python-version: ['3.8', '3.10', '3.12']
         os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
+        uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - name: "Set up Python"

.github/workflows/CI_mac.yml CHANGED Viewed

@@ -3,22 +3,16 @@ name: macOS
 on:
   push:
     branches:
-      - '**'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_mac.yml'
-      - 'setup.py'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
-      - '*'
     paths:
-      - 'test/**'
-      - 'pysr/**'
-      - '.github/workflows/CI_mac.yml'
-      - 'setup.py'
 jobs:
   test:
@@ -30,17 +24,17 @@ jobs:
     strategy:
       matrix:
         julia-version: ['1']
-        python-version: ['3.11']
         os: [macos-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
-        uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
-        uses: julia-actions/cache@v1
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false

 on:
   push:
     branches:
+      - 'master'
     paths:
+      - '**'
     tags:
       - 'v*.*.*'
   pull_request:
     branches:
+      - 'master'
     paths:
+      - '**'
 jobs:
   test:
     strategy:
       matrix:
         julia-version: ['1']
+        python-version: ['3.12']
         os: [macos-latest]
     steps:
       - uses: actions/checkout@v4
       - name: "Set up Julia"
+        uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - name: "Cache Julia"
+        uses: julia-actions/cache@v2
         with:
           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
           cache-packages: false

.github/workflows/docker_deploy.yml CHANGED Viewed

@@ -18,19 +18,19 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         arch: [linux/amd64]
-        python-version: [3.11.6]
-        julia-version: [1.9.4]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Login to Docker Hub
-        uses: docker/login-action@v2
         if: github.event_name != 'pull_request'
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
       - name: Login to GitHub registry
-        uses: docker/login-action@v2
         if: github.event_name != 'pull_request'
         with:
           registry: ghcr.io
@@ -55,11 +55,11 @@ jobs:
             type=sha
             type=raw,value=latest,enable={{is_default_branch}}
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
       - name: Build and push
-        uses: docker/build-push-action@v5
         with:
           context: .
           platforms: ${{ matrix.arch }}

       matrix:
         os: [ubuntu-latest]
         arch: [linux/amd64]
+        python-version: [3.12.3]
+        julia-version: [1.10.3]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
       - name: Login to Docker Hub
+        uses: docker/login-action@v3
         if: github.event_name != 'pull_request'
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
       - name: Login to GitHub registry
+        uses: docker/login-action@v3
         if: github.event_name != 'pull_request'
         with:
           registry: ghcr.io
             type=sha
             type=raw,value=latest,enable={{is_default_branch}}
       - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
       - name: Build and push
+        uses: docker/build-push-action@v6
         with:
           context: .
           platforms: ${{ matrix.arch }}

.github/workflows/update_backend.yml CHANGED Viewed

@@ -40,7 +40,6 @@ jobs:
       - name: "Create PR if necessary"
         uses: peter-evans/create-pull-request@v6
         with:
-          token: ${{ secrets.REPO_SCOPED_TOKEN }}
           title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
           body: |
             This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`

       - name: "Create PR if necessary"
         uses: peter-evans/create-pull-request@v6
         with:
           title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
           body: |
             This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`

.gitignore CHANGED Viewed

@@ -23,3 +23,5 @@ site
 **/*.code-workspace
 **/*.tar.gz
 venv

 **/*.code-workspace
 **/*.tar.gz
 venv
+requirements-dev.lock
+requirements.lock

.pre-commit-config.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 repos:
   # General linting
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -9,14 +9,14 @@ repos:
       - id: check-added-large-files
   # General formatting
   - repo: https://github.com/psf/black
-    rev: 23.12.1
     hooks:
       - id: black
       - id: black-jupyter
         exclude: pysr/test/test_nb.ipynb
   # Stripping notebooks
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.6.1
     hooks:
       - id: nbstripout
         exclude: pysr/test/test_nb.ipynb

 repos:
   # General linting
   - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-added-large-files
   # General formatting
   - repo: https://github.com/psf/black
+    rev: 24.4.2
     hooks:
       - id: black
       - id: black-jupyter
         exclude: pysr/test/test_nb.ipynb
   # Stripping notebooks
   - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
     hooks:
       - id: nbstripout
         exclude: pysr/test/test_nb.ipynb

Dockerfile CHANGED Viewed

@@ -1,8 +1,8 @@
 # This builds a dockerfile containing a working copy of PySR
 # with all pre-requisites installed.
-ARG JLVERSION=1.10.0
-ARG PYVERSION=3.11.6
 ARG BASE_IMAGE=bullseye
 FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl

 # This builds a dockerfile containing a working copy of PySR
 # with all pre-requisites installed.
+ARG JLVERSION=1.10.4
+ARG PYVERSION=3.12.2
 ARG BASE_IMAGE=bullseye
 FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl

README.md CHANGED Viewed

@@ -297,7 +297,7 @@ model = PySRRegressor(
     # ^ Higher precision calculations.
     warm_start=True,
     # ^ Start from where left off.
-    bumper=True,
     # ^ Faster evaluation (experimental)
     extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
     # extra_torch_mappings={sympy.cos: torch.cos},

     # ^ Higher precision calculations.
     warm_start=True,
     # ^ Start from where left off.
+    turbo=True,
     # ^ Faster evaluation (experimental)
     extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
     # extra_torch_mappings={sympy.cos: torch.cos},

benchmarks/hyperparamopt.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Start a hyperoptimization from a single node"""
 import pickle as pkl
 import sys

 """Start a hyperoptimization from a single node"""
 import pickle as pkl
 import sys

benchmarks/print_best_model.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Print the best model parameters and loss"""
 import pickle as pkl
 from pprint import PrettyPrinter

 """Print the best model parameters and loss"""
 import pickle as pkl
 from pprint import PrettyPrinter

docs/examples.md CHANGED Viewed

@@ -428,7 +428,7 @@ the evaluation, as we simply evaluated each argument and divided the result) int
 `((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
 `(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
 equal to:
-$\frac{x_0^2 x_1 - 2.0000073}{x_2^2 - 1.0000019}$, which
 is nearly the same as the true equation!
 ## 10. Dimensional constraints
@@ -520,6 +520,8 @@ a constant `"2.6353e-22[m s⁻²]"`.
 Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
 ## 11. Additional features

 `((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
 `(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
 equal to:
+$\frac{x_0^2 x_1 - 2.0000073}{x_2^2 + 1.0000019}$, which
 is nearly the same as the true equation!
 ## 10. Dimensional constraints
 Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
+Note that you can also search for exclusively dimensionless constants by settings
+`dimensionless_constants_only` to `true`.
 ## 11. Additional features

docs/generate_papers.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """This script generates the papers.md file from the papers.yml file."""
 from pathlib import Path
 import yaml

 """This script generates the papers.md file from the papers.yml file."""
 from pathlib import Path
 import yaml

environment.yml CHANGED Viewed

@@ -2,11 +2,10 @@ name: test
 channels:
   - conda-forge
 dependencies:
-  - python>=3.7
   - sympy>=1.0.0,<2.0.0
   - pandas>=0.21.0,<3.0.0
   - numpy>=1.13.0,<2.0.0
   - scikit-learn>=1.0.0,<2.0.0
   - pyjuliacall>=0.9.15,<0.10.0
   - click>=7.0.0,<9.0.0
-  - typing_extensions>=4.0.0,<5.0.0

 channels:
   - conda-forge
 dependencies:
+  - python>=3.8
   - sympy>=1.0.0,<2.0.0
   - pandas>=0.21.0,<3.0.0
   - numpy>=1.13.0,<2.0.0
   - scikit-learn>=1.0.0,<2.0.0
   - pyjuliacall>=0.9.15,<0.10.0
   - click>=7.0.0,<9.0.0

examples/pysr_demo.ipynb CHANGED Viewed

@@ -396,7 +396,7 @@
     "id": "wbWHyOjl2_kX"
    },
    "source": [
-    "Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default 2). You can also define custom complexities for variables and constants (`complexity_of_variables` and `complexity_of_constants`, respectively - both take a single number).\n",
     "\n",
     "\n",
     "One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",

     "id": "wbWHyOjl2_kX"
    },
    "source": [
+    "Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default 1). You can also define custom complexities for variables and constants (`complexity_of_variables` and `complexity_of_constants`, respectively - both take a single number).\n",
     "\n",
     "\n",
     "One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",

pyproject.toml CHANGED Viewed

@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pysr"
-version = "0.18.1"
 authors = [
     {name = "Miles Cranmer", email = "[email protected]"},
 ]
 description = "Simple and efficient symbolic regression"
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE"}
-requires-python = ">=3.7"
 classifiers = [
     "Programming Language :: Python :: 3",
     "Operating System :: OS Independent",
@@ -29,3 +29,17 @@ dependencies = {file = "requirements.txt"}
 [tool.isort]
 profile = "black"

 [project]
 name = "pysr"
+version = "0.19.0"
 authors = [
     {name = "Miles Cranmer", email = "[email protected]"},
 ]
 description = "Simple and efficient symbolic regression"
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE"}
+requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Python :: 3",
     "Operating System :: OS Independent",
 [tool.isort]
 profile = "black"
+[tool.rye]
+dev-dependencies = [
+    "pre-commit>=3.7.0",
+    "ipython>=8.23.0",
+    "ipykernel>=6.29.4",
+    "mypy>=1.10.0",
+    "jax[cpu]>=0.4.26",
+    "torch>=2.3.0",
+    "pandas-stubs>=2.2.1.240316",
+    "types-pytz>=2024.1.0.20240417",
+    "types-openpyxl>=3.1.0.20240428",
+    "coverage>=7.5.3",
+]

pysr/denoising.py CHANGED Viewed

@@ -1,8 +1,17 @@
 """Functions for denoising data during preprocessing."""
 import numpy as np
-def denoise(X, y, Xresampled=None, random_state=None):
     """Denoise the dataset using a Gaussian process."""
     from sklearn.gaussian_process import GaussianProcessRegressor
     from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
@@ -14,12 +23,17 @@ def denoise(X, y, Xresampled=None, random_state=None):
     gpr.fit(X, y)
     if Xresampled is not None:
-        return Xresampled, gpr.predict(Xresampled)
-    return X, gpr.predict(X)
-def multi_denoise(X, y, Xresampled=None, random_state=None):
     """Perform `denoise` along each column of `y` independently."""
     y = np.stack(
         [

 """Functions for denoising data during preprocessing."""
+from typing import Optional, Tuple, cast
 import numpy as np
+from numpy import ndarray
+def denoise(
+    X: ndarray,
+    y: ndarray,
+    Xresampled: Optional[ndarray] = None,
+    random_state: Optional[np.random.RandomState] = None,
+) -> Tuple[ndarray, ndarray]:
     """Denoise the dataset using a Gaussian process."""
     from sklearn.gaussian_process import GaussianProcessRegressor
     from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
     gpr.fit(X, y)
     if Xresampled is not None:
+        return Xresampled, cast(ndarray, gpr.predict(Xresampled))
+    return X, cast(ndarray, gpr.predict(X))
+def multi_denoise(
+    X: ndarray,
+    y: ndarray,
+    Xresampled: Optional[ndarray] = None,
+    random_state: Optional[np.random.RandomState] = None,
+):
     """Perform `denoise` along each column of `y` independently."""
     y = np.stack(
         [

pysr/deprecated.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Various functions to deprecate features."""
 import warnings
 from .julia_import import jl

 """Various functions to deprecate features."""
 import warnings
 from .julia_import import jl

pysr/export_jax.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import sympy
 # Special since need to reduce arguments.
@@ -55,7 +56,9 @@ def sympy2jaxtext(expr, parameters, symbols_in, extra_jax_mappings=None):
     if issubclass(expr.func, sympy.Float):
         parameters.append(float(expr))
         return f"parameters[{len(parameters) - 1}]"
-    elif issubclass(expr.func, sympy.Rational):
         return f"{float(expr)}"
     elif issubclass(expr.func, sympy.Integer):
         return f"{int(expr)}"

+import numpy as np  # noqa: F401
 import sympy
 # Special since need to reduce arguments.
     if issubclass(expr.func, sympy.Float):
         parameters.append(float(expr))
         return f"parameters[{len(parameters) - 1}]"
+    elif issubclass(expr.func, sympy.Rational) or issubclass(
+        expr.func, sympy.NumberSymbol
+    ):
         return f"{float(expr)}"
     elif issubclass(expr.func, sympy.Integer):
         return f"{int(expr)}"

pysr/export_latex.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Functions to help export PySR equations to LaTeX."""
 from typing import List, Optional, Tuple
 import pandas as pd
@@ -152,3 +153,15 @@ def sympy2multilatextable(
     ]
     return "\n\n".join(latex_tables)

 """Functions to help export PySR equations to LaTeX."""
 from typing import List, Optional, Tuple
 import pandas as pd
     ]
     return "\n\n".join(latex_tables)
+def with_preamble(table_string: str) -> str:
+    preamble_string = [
+        r"\usepackage{breqn}",
+        r"\usepackage{booktabs}",
+        "",
+        "...",
+        "",
+        table_string,
+    ]
+    return "\n".join(preamble_string)

pysr/export_numpy.py CHANGED Viewed

@@ -1,9 +1,12 @@
 """Code for exporting discovered expressions to numpy"""
 import warnings
 import numpy as np
 import pandas as pd
-from sympy import lambdify
 def sympy2numpy(eqn, sympy_symbols, *, selection=None):
@@ -13,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
 class CallableEquation:
     """Simple wrapper for numpy lambda functions built with sympy"""
     def __init__(self, eqn, sympy_symbols, selection=None):
         self._sympy = eqn
         self._sympy_symbols = sympy_symbols
@@ -28,8 +35,9 @@ class CallableEquation:
             return self._lambda(
                 **{k: X[k].values for k in map(str, self._sympy_symbols)}
             ) * np.ones(expected_shape)
         if self._selection is not None:
-            if X.shape[1] != len(self._selection):
                 warnings.warn(
                     "`X` should be of shape (n_samples, len(self._selection)). "
                     "Automatically filtering `X` to selection. "
@@ -37,6 +45,7 @@ class CallableEquation:
                     "this may lead to incorrect predictions and other errors."
                 )
                 X = X[:, self._selection]
         return self._lambda(*X.T) * np.ones(expected_shape)
     @property

 """Code for exporting discovered expressions to numpy"""
 import warnings
+from typing import List, Union
 import numpy as np
 import pandas as pd
+from numpy.typing import NDArray
+from sympy import Expr, Symbol, lambdify
 def sympy2numpy(eqn, sympy_symbols, *, selection=None):
 class CallableEquation:
     """Simple wrapper for numpy lambda functions built with sympy"""
+    _sympy: Expr
+    _sympy_symbols: List[Symbol]
+    _selection: Union[NDArray[np.bool_], None]
     def __init__(self, eqn, sympy_symbols, selection=None):
         self._sympy = eqn
         self._sympy_symbols = sympy_symbols
             return self._lambda(
                 **{k: X[k].values for k in map(str, self._sympy_symbols)}
             ) * np.ones(expected_shape)
         if self._selection is not None:
+            if X.shape[1] != self._selection.sum():
                 warnings.warn(
                     "`X` should be of shape (n_samples, len(self._selection)). "
                     "Automatically filtering `X` to selection. "
                     "this may lead to incorrect predictions and other errors."
                 )
                 X = X[:, self._selection]
         return self._lambda(*X.T) * np.ones(expected_shape)
     @property

pysr/export_sympy.py CHANGED Viewed

@@ -1,9 +1,12 @@
 """Define utilities to export to sympy"""
 from typing import Callable, Dict, List, Optional
 import sympy
 from sympy import sympify
 sympy_mappings = {
     "div": lambda x, y: x / y,
     "mult": lambda x, y: x * y,
@@ -29,8 +32,8 @@ sympy_mappings = {
     "acosh": lambda x: sympy.acosh(x),
     "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
     "asinh": sympy.asinh,
-    "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
-    "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
     "abs": abs,
     "mod": sympy.Mod,
     "erf": sympy.erf,
@@ -50,6 +53,7 @@ sympy_mappings = {
     "round": lambda x: sympy.ceiling(x - 0.5),
     "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
     "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
     "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
     "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
     "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
@@ -58,13 +62,13 @@ sympy_mappings = {
 def create_sympy_symbols_map(
-    feature_names_in: List[str],
 ) -> Dict[str, sympy.Symbol]:
     return {variable: sympy.Symbol(variable) for variable in feature_names_in}
 def create_sympy_symbols(
-    feature_names_in: List[str],
 ) -> List[sympy.Symbol]:
     return [sympy.Symbol(variable) for variable in feature_names_in]
@@ -72,7 +76,7 @@ def create_sympy_symbols(
 def pysr2sympy(
     equation: str,
     *,
-    feature_names_in: Optional[List[str]] = None,
     extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
 ):
     if feature_names_in is None:
@@ -83,7 +87,12 @@ def pysr2sympy(
         **sympy_mappings,
     }
-    return sympify(equation, locals=local_sympy_mappings)
 def assert_valid_sympy_symbol(var_name: str) -> None:

 """Define utilities to export to sympy"""
 from typing import Callable, Dict, List, Optional
 import sympy
 from sympy import sympify
+from .utils import ArrayLike
 sympy_mappings = {
     "div": lambda x, y: x / y,
     "mult": lambda x, y: x * y,
     "acosh": lambda x: sympy.acosh(x),
     "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
     "asinh": sympy.asinh,
+    "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
+    "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
     "abs": abs,
     "mod": sympy.Mod,
     "erf": sympy.erf,
     "round": lambda x: sympy.ceiling(x - 0.5),
     "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
     "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
+    "greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)),
     "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
     "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
     "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
 def create_sympy_symbols_map(
+    feature_names_in: ArrayLike[str],
 ) -> Dict[str, sympy.Symbol]:
     return {variable: sympy.Symbol(variable) for variable in feature_names_in}
 def create_sympy_symbols(
+    feature_names_in: ArrayLike[str],
 ) -> List[sympy.Symbol]:
     return [sympy.Symbol(variable) for variable in feature_names_in]
 def pysr2sympy(
     equation: str,
     *,
+    feature_names_in: Optional[ArrayLike[str]] = None,
     extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
 ):
     if feature_names_in is None:
         **sympy_mappings,
     }
+    try:
+        return sympify(equation, locals=local_sympy_mappings, evaluate=False)
+    except TypeError as e:
+        if "got an unexpected keyword argument 'evaluate'" in str(e):
+            return sympify(equation, locals=local_sympy_mappings)
+        raise TypeError(f"Error processing equation '{equation}'") from e
 def assert_valid_sympy_symbol(var_name: str) -> None:

pysr/export_torch.py CHANGED Viewed

@@ -1,11 +1,9 @@
-#####
-# From https://github.com/patrick-kidger/sympytorch
-# Copied here to allow PySR-specific tweaks
-#####
 import collections as co
 import functools as ft
 import sympy
@@ -84,7 +82,7 @@ def _initialize_torch():
         }
         class _Node(torch.nn.Module):
-            """SympyTorch code from https://github.com/patrick-kidger/sympytorch"""
             def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
                 super().__init__(**kwargs)
@@ -116,6 +114,11 @@ def _initialize_torch():
                     self._value = int(expr)
                     self._torch_func = lambda: self._value
                     self._args = ()
                 elif issubclass(expr.func, sympy.Symbol):
                     self._name = expr.name
                     self._torch_func = lambda value: value
@@ -156,7 +159,7 @@ def _initialize_torch():
                 return self._torch_func(*args)
         class _SingleSymPyModule(torch.nn.Module):
-            """SympyTorch code from https://github.com/patrick-kidger/sympytorch"""
             def __init__(
                 self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs

+# Fork of https://github.com/patrick-kidger/sympytorch
 import collections as co
 import functools as ft
+import numpy as np  # noqa: F401
 import sympy
         }
         class _Node(torch.nn.Module):
+            """Forked from https://github.com/patrick-kidger/sympytorch"""
             def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
                 super().__init__(**kwargs)
                     self._value = int(expr)
                     self._torch_func = lambda: self._value
                     self._args = ()
+                elif issubclass(expr.func, sympy.NumberSymbol):
+                    # Can get here from exp(1) or exact pi
+                    self._value = float(expr)
+                    self._torch_func = lambda: self._value
+                    self._args = ()
                 elif issubclass(expr.func, sympy.Symbol):
                     self._name = expr.name
                     self._torch_func = lambda value: value
                 return self._torch_func(*args)
         class _SingleSymPyModule(torch.nn.Module):
+            """Forked from https://github.com/patrick-kidger/sympytorch"""
             def __init__(
                 self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs

pysr/feature_selection.py CHANGED Viewed

@@ -1,8 +1,20 @@
 """Functions for doing feature selection during preprocessing."""
 import numpy as np
-def run_feature_selection(X, y, select_k_features, random_state=None):
     """
     Find most important features.
@@ -20,11 +32,16 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
     selector = SelectFromModel(
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
-    return selector.get_support(indices=True)
 # Function has not been removed only due to usage in module tests
-def _handle_feature_selection(X, select_k_features, y, variable_names):
     if select_k_features is not None:
         selection = run_feature_selection(X, y, select_k_features)
         print(f"Using features {[variable_names[i] for i in selection]}")

 """Functions for doing feature selection during preprocessing."""
+from typing import Optional, cast
 import numpy as np
+from numpy import ndarray
+from numpy.typing import NDArray
+from .utils import ArrayLike
+def run_feature_selection(
+    X: ndarray,
+    y: ndarray,
+    select_k_features: int,
+    random_state: Optional[np.random.RandomState] = None,
+) -> NDArray[np.bool_]:
     """
     Find most important features.
     selector = SelectFromModel(
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
+    return cast(NDArray[np.bool_], selector.get_support(indices=False))
 # Function has not been removed only due to usage in module tests
+def _handle_feature_selection(
+    X: ndarray,
+    select_k_features: Optional[int],
+    y: ndarray,
+    variable_names: ArrayLike[str],
+):
     if select_k_features is not None:
         selection = run_feature_selection(X, y, select_k_features)
         print(f"Using features {[variable_names[i] for i in selection]}")

pysr/julia_helpers.py CHANGED Viewed

@@ -1,11 +1,16 @@
 """Functions for initializing the Julia environment and installing deps."""
 import numpy as np
 from juliacall import convert as jl_convert  # type: ignore
 from .deprecated import init_julia, install
 from .julia_import import jl
 jl.seval("using Serialization: Serialization")
 jl.seval("using PythonCall: PythonCall")
@@ -22,24 +27,31 @@ def _escape_filename(filename):
     return str_repr
-def _load_cluster_manager(cluster_manager):
     jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
     return jl.seval(f"addprocs_{cluster_manager}")
-def jl_array(x):
     if x is None:
         return None
-    return jl_convert(jl.Array, x)
-def jl_serialize(obj):
     buf = jl.IOBuffer()
     Serialization.serialize(buf, obj)
     return np.array(jl.take_b(buf))
-def jl_deserialize(s):
     if s is None:
         return s
     buf = jl.IOBuffer()

 """Functions for initializing the Julia environment and installing deps."""
+from typing import Any, Callable, Union, cast
 import numpy as np
 from juliacall import convert as jl_convert  # type: ignore
+from numpy.typing import NDArray
 from .deprecated import init_julia, install
 from .julia_import import jl
+jl_convert = cast(Callable[[Any, Any], Any], jl_convert)
 jl.seval("using Serialization: Serialization")
 jl.seval("using PythonCall: PythonCall")
     return str_repr
+def _load_cluster_manager(cluster_manager: str):
     jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
     return jl.seval(f"addprocs_{cluster_manager}")
+def jl_array(x, dtype=None):
     if x is None:
         return None
+    elif dtype is None:
+        return jl_convert(jl.Array, x)
+    else:
+        return jl_convert(jl.Array[dtype], x)
+def jl_is_function(f) -> bool:
+    return cast(bool, jl.seval("op -> op isa Function")(f))
+def jl_serialize(obj: Any) -> NDArray[np.uint8]:
     buf = jl.IOBuffer()
     Serialization.serialize(buf, obj)
     return np.array(jl.take_b(buf))
+def jl_deserialize(s: Union[NDArray[np.uint8], None]):
     if s is None:
         return s
     buf = jl.IOBuffer()

pysr/julia_import.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import sys
 import warnings
 # Check if JuliaCall is already loaded, and if so, warn the user
 # about the relevant environment variables. If not loaded,
@@ -35,31 +37,17 @@ else:
         os.environ[k] = os.environ.get(k, default)
-from juliacall import Main as jl  # type: ignore
-jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
-# Next, automatically load the juliacall extension if we're in a Jupyter notebook
-autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS", "yes")
-if autoload_extensions in {"yes", ""} and jl_version >= (1, 9, 0):
-    try:
-        get_ipython = sys.modules["IPython"].get_ipython
-        if "IPKernelApp" not in get_ipython().config:
-            raise ImportError("console")
-        print(
-            "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable."
-        )
-        # TODO: Turn this off if juliacall does this automatically
-        get_ipython().run_line_magic("load_ext", "juliacall")
-    except Exception:
-        pass
-elif autoload_extensions not in {"no", "yes", ""}:
-    warnings.warn(
-        "PYSR_AUTOLOAD_EXTENSIONS environment variable is set to something other than 'yes' or 'no' or ''."
-    )
 jl.seval("using SymbolicRegression")
 SymbolicRegression = jl.SymbolicRegression

 import os
 import sys
 import warnings
+from types import ModuleType
+from typing import cast
 # Check if JuliaCall is already loaded, and if so, warn the user
 # about the relevant environment variables. If not loaded,
         os.environ[k] = os.environ.get(k, default)
+autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS")
+if autoload_extensions is not None:
+    # Deprecated; so just pass to juliacall
+    os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions
+from juliacall import Main as jl  # type: ignore
+jl = cast(ModuleType, jl)
+jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
 jl.seval("using SymbolicRegression")
 SymbolicRegression = jl.SymbolicRegression

pysr/juliapkg.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "packages": {
         "SymbolicRegression": {
             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
-            "version": "=0.24.1"
         },
         "Serialization": {
             "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",

     "packages": {
         "SymbolicRegression": {
             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
+            "version": "=0.24.5"
         },
         "Serialization": {
             "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",

pysr/param_groupings.yml CHANGED Viewed

@@ -14,6 +14,7 @@
     - loss_function
     - model_selection
     - dimensional_constraint_penalty
   - Working with Complexities:
     - parsimony
     - constraints

     - loss_function
     - model_selection
     - dimensional_constraint_penalty
+    - dimensionless_constants_only
   - Working with Complexities:
     - parsimony
     - constraints

pysr/sklearn_monkeypatch.py CHANGED Viewed

@@ -3,8 +3,7 @@
 from sklearn.utils import validation
-def _ensure_no_complex_data(*args, **kwargs):
-    ...
 try:

 from sklearn.utils import validation
+def _ensure_no_complex_data(*args, **kwargs): ...
 try:

pysr/sr.py CHANGED Viewed

@@ -8,27 +8,31 @@ import shutil
 import sys
 import tempfile
 import warnings
 from datetime import datetime
 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Tuple, Union
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
 import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.utils import check_array, check_consistent_length, check_random_state
-from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
 from .denoising import denoise, multi_denoise
 from .deprecated import DEPRECATED_KWARGS
 from .export_jax import sympy2jax
-from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
 from .export_numpy import sympy2numpy
 from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
 from .export_torch import sympy2torch
@@ -40,17 +44,21 @@ from .julia_helpers import (
     _load_cluster_manager,
     jl_array,
     jl_deserialize,
     jl_serialize,
 )
 from .julia_import import SymbolicRegression, jl
 from .utils import (
     _csv_filename_to_pkl_filename,
     _preprocess_julia_floats,
     _safe_check_feature_names_in,
     _subscriptify,
 )
-already_ran = False
 def _process_constraints(binary_operators, unary_operators, constraints):
@@ -113,7 +121,7 @@ def _maybe_create_inline_operators(
                         "and underscores are allowed."
                     )
                 if (extra_sympy_mappings is None) or (
-                    not function_name in extra_sympy_mappings
                 ):
                     raise ValueError(
                         f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
@@ -130,6 +138,7 @@ def _check_assertions(
     X,
     use_custom_variable_names,
     variable_names,
     weights,
     y,
     X_units,
@@ -154,6 +163,13 @@ def _check_assertions(
                     "and underscores are allowed."
                 )
             assert_valid_sympy_symbol(var_name)
     if X_units is not None and len(X_units) != X.shape[1]:
         raise ValueError(
             "The number of units in `X_units` must equal the number of features in `X`."
@@ -178,6 +194,21 @@ def _check_assertions(
 VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
 class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     """
     High-performance symbolic regression algorithm.
@@ -309,7 +340,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         `idx` argument to the function, which is `nothing`
         for non-batched, and a 1D array of indices for batched.
         Default is `None`.
-    complexity_of_operators : dict[str, float]
         If you would like to use a complexity other than 1 for an
         operator, specify the complexity here. For example,
         `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
@@ -318,16 +349,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         numbers for a complexity, and the total complexity of a tree
         will be rounded to the nearest integer after computing.
         Default is `None`.
-    complexity_of_constants : float
         Complexity of constants. Default is `1`.
-    complexity_of_variables : float
-        Complexity of variables. Default is `1`.
     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
     dimensional_constraint_penalty : float
         Additive penalty for if dimensional analysis of an expression fails.
         By default, this is `1000.0`.
     use_frequency : bool
         Whether to measure the frequency of complexities, and use that
         instead of parsimony to explore equation space. Will naturally
@@ -603,22 +640,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Units of each variable in the training dataset, `y`.
     nout_ : int
         Number of output dimensions.
-    selection_mask_ : list[int] of length `select_k_features`
-        List of indices for input features that are selected when
-        `select_k_features` is set.
     tempdir_ : Path
         Path to the temporary equations directory.
-    equation_file_ : str
         Output equation file name produced by the julia backend.
     julia_state_stream_ : ndarray
         The serialized state for the julia SymbolicRegression.jl backend (after fitting),
         stored as an array of uint8, produced by Julia's Serialization.serialize function.
-    julia_state_
-        The deserialized state.
     julia_options_stream_ : ndarray
         The serialized julia options, stored as an array of uint8,
-    julia_options_
-        The deserialized julia options.
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
     show_pickle_warnings_ : bool
@@ -665,6 +697,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     ```
     """
     def __init__(
         self,
         model_selection: Literal["best", "accuracy", "score"] = "best",
@@ -685,9 +733,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         loss_function: Optional[str] = None,
         complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
         complexity_of_constants: Union[int, float] = 1,
-        complexity_of_variables: Union[int, float] = 1,
         parsimony: float = 0.0032,
         dimensional_constraint_penalty: Optional[float] = None,
         use_frequency: bool = True,
         use_frequency_in_tournament: bool = True,
         adaptive_parsimony_scaling: float = 20.0,
@@ -783,6 +832,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.complexity_of_variables = complexity_of_variables
         self.parsimony = parsimony
         self.dimensional_constraint_penalty = dimensional_constraint_penalty
         self.use_frequency = use_frequency
         self.use_frequency_in_tournament = use_frequency_in_tournament
         self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
@@ -863,15 +913,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                     updated_kwarg_name = DEPRECATED_KWARGS[k]
                     setattr(self, updated_kwarg_name, v)
                     warnings.warn(
-                        f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
                         "Please use that instead.",
                         FutureWarning,
                     )
                 # Handle kwargs that have been moved to the fit method
                 elif k in ["weights", "variable_names", "Xresampled"]:
                     warnings.warn(
-                        f"{k} is a data dependant parameter so should be passed when fit is called. "
-                        f"Ignoring parameter; please pass {k} during the call to fit instead.",
                         FutureWarning,
                     )
                 elif k == "julia_project":
@@ -888,21 +938,25 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                         FutureWarning,
                     )
                 else:
-                    raise TypeError(
-                        f"{k} is not a valid keyword argument for PySRRegressor."
                     )
     @classmethod
     def from_file(
         cls,
-        equation_file,
         *,
-        binary_operators=None,
-        unary_operators=None,
-        n_features_in=None,
-        feature_names_in=None,
-        selection_mask=None,
-        nout=1,
         verbosity=1,
         **pysr_kwargs,
     ):
@@ -911,7 +965,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Parameters
         ----------
-        equation_file : str
             Path to a pickle file containing a saved model, or a csv file
             containing equations.
         binary_operators : list[str]
@@ -926,8 +980,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         feature_names_in : list[str]
             Names of the features passed to the model.
             Not needed if loading from a pickle file.
-        selection_mask : list[bool]
-            If using select_k_features, you must pass `model.selection_mask_` here.
             Not needed if loading from a pickle file.
         nout : int
             Number of outputs of the model.
@@ -983,7 +1037,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         # TODO: copy .bkup file if exists.
         model = cls(
-            equation_file=equation_file,
             binary_operators=binary_operators,
             unary_operators=unary_operators,
             **pysr_kwargs,
@@ -1003,7 +1057,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             model.display_feature_names_in_ = feature_names_in
         if selection_mask is None:
-            model.selection_mask_ = np.ones(n_features_in, dtype=bool)
         else:
             model.selection_mask_ = selection_mask
@@ -1030,7 +1084,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             all_equations = equations
         for i, equations in enumerate(all_equations):
-            selected = ["" for _ in range(len(equations))]
             chosen_row = idx_model_selection(equations, self.model_selection)
             selected[chosen_row] = ">>>>"
             repr_equations = pd.DataFrame(
@@ -1063,15 +1117,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Handle pickle serialization for PySRRegressor.
         The Scikit-learn standard requires estimators to be serializable via
-        `pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
-        serialization.
-        Thus, for `PySRRegressor` to support pickle serialization, the
-        `julia_state_stream_` attribute must be hidden from pickle. This will
-        prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
-        but does allow all other attributes of a fitted `PySRRegressor` estimator
-        to be serialized. Note: Jax and Torch format equations are also removed
-        from the pickled instance.
         """
         state = self.__dict__
         show_pickle_warning = not (
@@ -1137,10 +1184,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     @property
     def julia_options_(self):
         return jl_deserialize(self.julia_options_stream_)
     @property
     def julia_state_(self):
         return jl_deserialize(self.julia_state_stream_)
     @property
@@ -1153,7 +1202,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         )
         return self.julia_state_
-    def get_best(self, index=None):
         """
         Get best equation using `model_selection`.
@@ -1176,8 +1225,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             Raised when an invalid model selection strategy is provided.
         """
         check_is_fitted(self, attributes=["equations_"])
-        if self.equations_ is None:
-            raise ValueError("No equations have been generated yet.")
         if index is not None:
             if isinstance(self.equations_, list):
@@ -1185,16 +1232,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                     index, list
                 ), "With multiple output features, index must be a list."
                 return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
-            return self.equations_.iloc[index]
         if isinstance(self.equations_, list):
             return [
-                eq.iloc[idx_model_selection(eq, self.model_selection)]
                 for eq in self.equations_
             ]
-        return self.equations_.iloc[
-            idx_model_selection(self.equations_, self.model_selection)
-        ]
     def _setup_equation_file(self):
         """
@@ -1219,7 +1271,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self.equation_file_ = self.equation_file
         self.equation_file_contents_ = None
-    def _validate_and_set_init_params(self):
         """
         Ensure parameters passed at initialization are valid.
@@ -1277,59 +1329,57 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
             )
-        progress = self.progress
-        # 'Mutable' parameter validation
-        #  (Params and their default values, if None is given:)
-        default_param_mapping = {
-            "binary_operators": "+ * - /".split(" "),
-            "unary_operators": [],
-            "maxdepth": self.maxsize,
-            "constraints": {},
-            "multithreading": self.procs != 0 and self.cluster_manager is None,
-            "batch_size": 1,
-            "update_verbosity": int(self.verbosity),
-            "progress": progress,
-        }
-        packed_modified_params = {}
-        for parameter, default_value in default_param_mapping.items():
-            parameter_value = getattr(self, parameter)
-            if parameter_value is None:
-                parameter_value = default_value
             else:
-                # Special cases such as when binary_operators is a string
-                if parameter in ["binary_operators", "unary_operators"] and isinstance(
-                    parameter_value, str
-                ):
-                    parameter_value = [parameter_value]
-                elif parameter == "batch_size" and parameter_value < 1:
-                    warnings.warn(
-                        "Given `batch_size` must be greater than or equal to one. "
-                        "`batch_size` has been increased to equal one."
-                    )
-                    parameter_value = 1
-                elif (
-                    parameter == "progress"
-                    and parameter_value
-                    and "buffer" not in sys.stdout.__dir__()
-                ):
-                    warnings.warn(
-                        "Note: it looks like you are running in Jupyter. "
-                        "The progress bar will be turned off."
-                    )
-                    parameter_value = False
-            packed_modified_params[parameter] = parameter_value
         assert (
-            len(packed_modified_params["binary_operators"])
-            + len(packed_modified_params["unary_operators"])
-            > 0
-        )
-        return packed_modified_params
     def _validate_and_set_fit_params(
-        self, X, y, Xresampled, weights, variable_names, X_units, y_units
-    ):
         """
         Validate the parameters passed to the :term`fit` method.
@@ -1349,12 +1399,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             Weight array of the same shape as `y`.
             Each element is how to weight the mean-square-error loss
             for that particular element of y.
-        variable_names : list[str] of length n_features
-            Names of each variable in the training dataset, `X`.
         X_units : list[str] of length n_features
-            Units of each variable in the training dataset, `X`.
         y_units : str | list[str] of length n_out
-            Units of each variable in the training dataset, `y`.
         Returns
         -------
@@ -1398,6 +1450,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 "Please use valid names instead."
             )
         # Data validation and feature name fetching via sklearn
         # This method sets the n_features_in_ attribute
         if Xresampled is not None:
@@ -1405,7 +1473,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         if weights is not None:
             weights = check_array(weights, ensure_2d=False)
             check_consistent_length(weights, y)
-        X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
         self.feature_names_in_ = _safe_check_feature_names_in(
             self, variable_names, generate_names=False
         )
@@ -1415,10 +1483,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self.display_feature_names_in_ = np.array(
                 [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
             )
         else:
             self.display_feature_names_in_ = self.feature_names_in_
-        variable_names = self.feature_names_in_
         # Handle multioutput data
         if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
@@ -1428,13 +1496,39 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         else:
             raise NotImplementedError("y shape not supported!")
         self.X_units_ = copy.deepcopy(X_units)
         self.y_units_ = copy.deepcopy(y_units)
-        return X, y, Xresampled, weights, variable_names, X_units, y_units
     def _pre_transform_training_data(
-        self, X, y, Xresampled, variable_names, X_units, y_units, random_state
     ):
         """
         Transform the training data before fitting the symbolic regressor.
@@ -1443,17 +1537,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Parameters
         ----------
-        X : ndarray | pandas.DataFrame
             Training data of shape (n_samples, n_features).
-        y : ndarray | pandas.DataFrame
             Target values of shape (n_samples,) or (n_samples, n_targets).
             Will be cast to X's dtype if necessary.
-        Xresampled : ndarray | pandas.DataFrame
             Resampled training data, of shape `(n_resampled, n_features)`,
             used for denoising.
         variable_names : list[str]
             Names of each variable in the training dataset, `X`.
             Of length `n_features`.
         X_units : list[str]
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str]
@@ -1486,24 +1582,43 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         """
         # Feature selection transformation
         if self.select_k_features:
-            self.selection_mask_ = run_feature_selection(
                 X, y, self.select_k_features, random_state=random_state
             )
-            X = X[:, self.selection_mask_]
             if Xresampled is not None:
-                Xresampled = Xresampled[:, self.selection_mask_]
             # Reduce variable_names to selection
-            variable_names = [variable_names[i] for i in self.selection_mask_]
             if X_units is not None:
-                X_units = [X_units[i] for i in self.selection_mask_]
                 self.X_units_ = copy.deepcopy(X_units)
             # Re-perform data validation and feature name updating
-            X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
             # Update feature names with selected variable names
             self.feature_names_in_ = _check_feature_names_in(self, variable_names)
             self.display_feature_names_in_ = self.feature_names_in_
             print(f"Using features {self.feature_names_in_}")
@@ -1517,22 +1632,29 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             else:
                 X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
-        return X, y, variable_names, X_units, y_units
-    def _run(self, X, y, mutated_params, weights, seed):
         """
         Run the symbolic regression fitting process on the julia backend.
         Parameters
         ----------
-        X : ndarray | pandas.DataFrame
             Training data of shape `(n_samples, n_features)`.
-        y : ndarray | pandas.DataFrame
             Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
             Will be cast to `X`'s dtype if necessary.
-        mutated_params : dict[str, Any]
-            Dictionary of mutated versions of some parameters passed in __init__.
-        weights : ndarray | pandas.DataFrame
             Weight array of the same shape as `y`.
             Each element is how to weight the mean-square-error loss
             for that particular element of y.
@@ -1551,24 +1673,27 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         """
         # Need to be global as we don't want to recreate/reinstate julia for
         # every new instance of PySRRegressor
-        global already_ran
         # These are the parameters which may be modified from the ones
         # specified in init, so we define them here locally:
-        binary_operators = mutated_params["binary_operators"]
-        unary_operators = mutated_params["unary_operators"]
-        maxdepth = mutated_params["maxdepth"]
-        constraints = mutated_params["constraints"]
         nested_constraints = self.nested_constraints
         complexity_of_operators = self.complexity_of_operators
-        multithreading = mutated_params["multithreading"]
         cluster_manager = self.cluster_manager
-        batch_size = mutated_params["batch_size"]
-        update_verbosity = mutated_params["update_verbosity"]
-        progress = mutated_params["progress"]
         # Start julia backend processes
-        if not already_ran and update_verbosity != 0:
             print("Compiling Julia backend...")
         if cluster_manager is not None:
@@ -1607,6 +1732,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 complexity_of_operators_str += f"({k}) => {v}, "
             complexity_of_operators_str += ")"
             complexity_of_operators = jl.seval(complexity_of_operators_str)
         custom_loss = jl.seval(
             str(self.elementwise_loss)
@@ -1643,16 +1772,30 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             optimize=self.weight_optimize,
         )
         # Call to Julia backend.
         # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
         options = SymbolicRegression.Options(
-            binary_operators=jl.seval(str(binary_operators).replace("'", "")),
-            unary_operators=jl.seval(str(unary_operators).replace("'", "")),
             bin_constraints=jl_array(bin_constraints),
             una_constraints=jl_array(una_constraints),
             complexity_of_operators=complexity_of_operators,
             complexity_of_constants=self.complexity_of_constants,
-            complexity_of_variables=self.complexity_of_variables,
             nested_constraints=nested_constraints,
             elementwise_loss=custom_loss,
             loss_function=custom_full_objective,
@@ -1667,6 +1810,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             # These have the same name:
             parsimony=self.parsimony,
             dimensional_constraint_penalty=self.dimensional_constraint_penalty,
             alpha=self.alpha,
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
@@ -1678,9 +1822,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             fraction_replaced_hof=self.fraction_replaced_hof,
             should_simplify=self.should_simplify,
             should_optimize_constants=self.should_optimize_constants,
-            warmup_maxsize_by=(
-                0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
-            ),
             use_frequency=self.use_frequency,
             use_frequency_in_tournament=self.use_frequency_in_tournament,
             adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
@@ -1787,7 +1929,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         if self.delete_tempfiles:
             shutil.rmtree(self.tempdir_)
-        already_ran = True
         return self
@@ -1797,9 +1939,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         y,
         Xresampled=None,
         weights=None,
-        variable_names: Optional[List[str]] = None,
-        X_units: Optional[List[str]] = None,
-        y_units: Optional[List[str]] = None,
     ) -> "PySRRegressor":
         """
         Search for equations to fit the dataset and store them in `self.equations_`.
@@ -1858,15 +2003,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self.selection_mask_ = None
             self.julia_state_stream_ = None
             self.julia_options_stream_ = None
             self.X_units_ = None
             self.y_units_ = None
-        random_state = check_random_state(self.random_state)  # For np random
-        seed = random_state.get_state()[1][0]  # For julia random
         self._setup_equation_file()
-        mutated_params = self._validate_and_set_init_params()
         (
             X,
@@ -1874,10 +2017,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             Xresampled,
             weights,
             variable_names,
             X_units,
             y_units,
         ) = self._validate_and_set_fit_params(
-            X, y, Xresampled, weights, variable_names, X_units, y_units
         )
         if X.shape[0] > 10000 and not self.batching:
@@ -1891,9 +2042,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 "More datapoints will lower the search speed."
             )
         # Pre transformations (feature selection and denoising)
-        X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
-            X, y, Xresampled, variable_names, X_units, y_units, random_state
         )
         # Warn about large feature counts (still warn if feature count is large
@@ -1903,13 +2066,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
                 "Note: you are running with 10 features or more. "
                 "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
                 "You should run PySR for more `niterations` to ensure it can find "
-                "the correct variables, "
-                "or, alternatively, do a dimensionality reduction beforehand. "
-                "For example, `X = PCA(n_components=6).fit_transform(X)`, "
-                "using scikit-learn's `PCA` class, "
-                "will reduce the number of features to 6 in an interpretable way, "
-                "as each resultant feature "
-                "will be a linear combination of the original features. "
             )
         # Assertion checks
@@ -1920,6 +2077,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             X,
             use_custom_variable_names,
             variable_names,
             weights,
             y,
             X_units,
@@ -1932,7 +2090,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             self._checkpoint()
         # Perform the search:
-        self._run(X, y, mutated_params, weights=weights, seed=seed)
         # Then, after fit, we save again, so the pickle file contains
         # the equations:
@@ -1941,7 +2099,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         return self
-    def refresh(self, checkpoint_file=None):
         """
         Update self.equations_ with any new options passed.
@@ -1950,11 +2108,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Parameters
         ----------
-        checkpoint_file : str
             Path to checkpoint hall of fame file to be loaded.
             The default will use the set `equation_file_`.
         """
-        if checkpoint_file:
             self.equation_file_ = checkpoint_file
             self.equation_file_contents_ = None
         check_is_fitted(self, attributes=["equation_file_"])
@@ -2006,7 +2164,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             if self.selection_mask_ is not None:
                 # RangeIndex enforces column order allowing columns to
                 # be correctly filtered with self.selection_mask_
-                X = X.iloc[:, self.selection_mask_]
             X.columns = self.feature_names_in_
         # Without feature information, CallableEquation/lambda_format equations
         # require that the column order of X matches that of the X used during
@@ -2016,14 +2174,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         # reordered/reindexed to match those of the transformed (denoised and
         # feature selected) X in fit.
         X = X.reindex(columns=self.feature_names_in_)
-        X = self._validate_data(X, reset=False)
         try:
-            if self.nout_ > 1:
                 return np.stack(
                     [eq["lambda_format"](X) for eq in best_equation], axis=1
                 )
-            return best_equation["lambda_format"](X)
         except Exception as error:
             raise ValueError(
                 "Failed to evaluate the expression. "
@@ -2053,9 +2213,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         """
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
             return [eq["sympy_format"] for eq in best_equation]
-        return best_equation["sympy_format"]
     def latex(self, index=None, precision=3):
         """
@@ -2115,9 +2277,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.set_params(output_jax_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
             return [eq["jax_format"] for eq in best_equation]
-        return best_equation["jax_format"]
     def pytorch(self, index=None):
         """
@@ -2145,9 +2309,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.set_params(output_torch_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
             return [eq["torch_format"] for eq in best_equation]
-        return best_equation["torch_format"]
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""
@@ -2246,10 +2411,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             lastComplexity = 0
             sympy_format = []
             lambda_format = []
-            if self.output_jax_format:
-                jax_format = []
-            if self.output_torch_format:
-                torch_format = []
             for _, eqn_row in output.iterrows():
                 eqn = pysr2sympy(
@@ -2361,7 +2524,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         """
         self.refresh()
-        if self.nout_ > 1:
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], list)
@@ -2370,7 +2533,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             table_string = sympy2multilatextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
-        else:
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], int)
@@ -2378,15 +2541,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             table_string = sympy2latextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
-        preamble_string = [
-            r"\usepackage{breqn}",
-            r"\usepackage{booktabs}",
-            "",
-            "...",
-            "",
-        ]
-        return "\n".join(preamble_string + [table_string])
 def idx_model_selection(equations: pd.DataFrame, model_selection: str):
@@ -2404,3 +2565,30 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str):
             f"{model_selection} is not a valid model selection strategy."
         )
     return chosen_idx

 import sys
 import tempfile
 import warnings
+from dataclasses import dataclass, fields
 from datetime import datetime
 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
 import numpy as np
 import pandas as pd
+from numpy import ndarray
+from numpy.typing import NDArray
 from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.utils import check_array, check_consistent_length, check_random_state
+from sklearn.utils.validation import _check_feature_names_in  # type: ignore
+from sklearn.utils.validation import check_is_fitted
 from .denoising import denoise, multi_denoise
 from .deprecated import DEPRECATED_KWARGS
 from .export_jax import sympy2jax
+from .export_latex import (
+    sympy2latex,
+    sympy2latextable,
+    sympy2multilatextable,
+    with_preamble,
+)
 from .export_numpy import sympy2numpy
 from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
 from .export_torch import sympy2torch
     _load_cluster_manager,
     jl_array,
     jl_deserialize,
+    jl_is_function,
     jl_serialize,
 )
 from .julia_import import SymbolicRegression, jl
 from .utils import (
+    ArrayLike,
+    PathLike,
     _csv_filename_to_pkl_filename,
     _preprocess_julia_floats,
     _safe_check_feature_names_in,
     _subscriptify,
+    _suggest_keywords,
 )
+ALREADY_RAN = False
 def _process_constraints(binary_operators, unary_operators, constraints):
                         "and underscores are allowed."
                     )
                 if (extra_sympy_mappings is None) or (
+                    function_name not in extra_sympy_mappings
                 ):
                     raise ValueError(
                         f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
     X,
     use_custom_variable_names,
     variable_names,
+    complexity_of_variables,
     weights,
     y,
     X_units,
                     "and underscores are allowed."
                 )
             assert_valid_sympy_symbol(var_name)
+    if (
+        isinstance(complexity_of_variables, list)
+        and len(complexity_of_variables) != X.shape[1]
+    ):
+        raise ValueError(
+            "The number of elements in `complexity_of_variables` must equal the number of features in `X`."
+        )
     if X_units is not None and len(X_units) != X.shape[1]:
         raise ValueError(
             "The number of units in `X_units` must equal the number of features in `X`."
 VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
+@dataclass
+class _DynamicallySetParams:
+    """Defines some parameters that are set at runtime."""
+    binary_operators: List[str]
+    unary_operators: List[str]
+    maxdepth: int
+    constraints: Dict[str, str]
+    multithreading: bool
+    batch_size: int
+    update_verbosity: int
+    progress: bool
+    warmup_maxsize_by: float
 class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     """
     High-performance symbolic regression algorithm.
         `idx` argument to the function, which is `nothing`
         for non-batched, and a 1D array of indices for batched.
         Default is `None`.
+    complexity_of_operators : dict[str, Union[int, float]]
         If you would like to use a complexity other than 1 for an
         operator, specify the complexity here. For example,
         `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
         numbers for a complexity, and the total complexity of a tree
         will be rounded to the nearest integer after computing.
         Default is `None`.
+    complexity_of_constants : int | float
         Complexity of constants. Default is `1`.
+    complexity_of_variables : int | float
+        Global complexity of variables. To set different complexities for
+        different variables, pass a list of complexities to the `fit` method
+        with keyword `complexity_of_variables`. You cannot use both.
+        Default is `1`.
     parsimony : float
         Multiplicative factor for how much to punish complexity.
         Default is `0.0032`.
     dimensional_constraint_penalty : float
         Additive penalty for if dimensional analysis of an expression fails.
         By default, this is `1000.0`.
+    dimensionless_constants_only : bool
+        Whether to only search for dimensionless constants, if using units.
+        Default is `False`.
     use_frequency : bool
         Whether to measure the frequency of complexities, and use that
         instead of parsimony to explore equation space. Will naturally
         Units of each variable in the training dataset, `y`.
     nout_ : int
         Number of output dimensions.
+    selection_mask_ : ndarray of shape (`n_features_in_`,)
+        Mask of which features of `X` to use when `select_k_features` is set.
     tempdir_ : Path
         Path to the temporary equations directory.
+    equation_file_ : Union[str, Path]
         Output equation file name produced by the julia backend.
     julia_state_stream_ : ndarray
         The serialized state for the julia SymbolicRegression.jl backend (after fitting),
         stored as an array of uint8, produced by Julia's Serialization.serialize function.
     julia_options_stream_ : ndarray
         The serialized julia options, stored as an array of uint8,
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
     show_pickle_warnings_ : bool
     ```
     """
+    equations_: Union[pd.DataFrame, List[pd.DataFrame], None]
+    n_features_in_: int
+    feature_names_in_: ArrayLike[str]
+    display_feature_names_in_: ArrayLike[str]
+    complexity_of_variables_: Union[int, float, List[Union[int, float]], None]
+    X_units_: Union[ArrayLike[str], None]
+    y_units_: Union[str, ArrayLike[str], None]
+    nout_: int
+    selection_mask_: Union[NDArray[np.bool_], None]
+    tempdir_: Path
+    equation_file_: PathLike
+    julia_state_stream_: Union[NDArray[np.uint8], None]
+    julia_options_stream_: Union[NDArray[np.uint8], None]
+    equation_file_contents_: Union[List[pd.DataFrame], None]
+    show_pickle_warnings_: bool
     def __init__(
         self,
         model_selection: Literal["best", "accuracy", "score"] = "best",
         loss_function: Optional[str] = None,
         complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
         complexity_of_constants: Union[int, float] = 1,
+        complexity_of_variables: Optional[Union[int, float]] = None,
         parsimony: float = 0.0032,
         dimensional_constraint_penalty: Optional[float] = None,
+        dimensionless_constants_only: bool = False,
         use_frequency: bool = True,
         use_frequency_in_tournament: bool = True,
         adaptive_parsimony_scaling: float = 20.0,
         self.complexity_of_variables = complexity_of_variables
         self.parsimony = parsimony
         self.dimensional_constraint_penalty = dimensional_constraint_penalty
+        self.dimensionless_constants_only = dimensionless_constants_only
         self.use_frequency = use_frequency
         self.use_frequency_in_tournament = use_frequency_in_tournament
         self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
                     updated_kwarg_name = DEPRECATED_KWARGS[k]
                     setattr(self, updated_kwarg_name, v)
                     warnings.warn(
+                        f"`{k}` has been renamed to `{updated_kwarg_name}` in PySRRegressor. "
                         "Please use that instead.",
                         FutureWarning,
                     )
                 # Handle kwargs that have been moved to the fit method
                 elif k in ["weights", "variable_names", "Xresampled"]:
                     warnings.warn(
+                        f"`{k}` is a data-dependent parameter and should be passed when fit is called. "
+                        f"Ignoring parameter; please pass `{k}` during the call to fit instead.",
                         FutureWarning,
                     )
                 elif k == "julia_project":
                         FutureWarning,
                     )
                 else:
+                    suggested_keywords = _suggest_keywords(PySRRegressor, k)
+                    err_msg = (
+                        f"`{k}` is not a valid keyword argument for PySRRegressor."
                     )
+                    if len(suggested_keywords) > 0:
+                        err_msg += f" Did you mean {', '.join(map(lambda s: f'`{s}`', suggested_keywords))}?"
+                    raise TypeError(err_msg)
     @classmethod
     def from_file(
         cls,
+        equation_file: PathLike,
         *,
+        binary_operators: Optional[List[str]] = None,
+        unary_operators: Optional[List[str]] = None,
+        n_features_in: Optional[int] = None,
+        feature_names_in: Optional[ArrayLike[str]] = None,
+        selection_mask: Optional[NDArray[np.bool_]] = None,
+        nout: int = 1,
         verbosity=1,
         **pysr_kwargs,
     ):
         Parameters
         ----------
+        equation_file : str or Path
             Path to a pickle file containing a saved model, or a csv file
             containing equations.
         binary_operators : list[str]
         feature_names_in : list[str]
             Names of the features passed to the model.
             Not needed if loading from a pickle file.
+        selection_mask : NDArray[np.bool_]
+            If using `select_k_features`, you must pass `model.selection_mask_` here.
             Not needed if loading from a pickle file.
         nout : int
             Number of outputs of the model.
         # TODO: copy .bkup file if exists.
         model = cls(
+            equation_file=str(equation_file),
             binary_operators=binary_operators,
             unary_operators=unary_operators,
             **pysr_kwargs,
             model.display_feature_names_in_ = feature_names_in
         if selection_mask is None:
+            model.selection_mask_ = np.ones(n_features_in, dtype=np.bool_)
         else:
             model.selection_mask_ = selection_mask
             all_equations = equations
         for i, equations in enumerate(all_equations):
+            selected = pd.Series([""] * len(equations), index=equations.index)
             chosen_row = idx_model_selection(equations, self.model_selection)
             selected[chosen_row] = ">>>>"
             repr_equations = pd.DataFrame(
         Handle pickle serialization for PySRRegressor.
         The Scikit-learn standard requires estimators to be serializable via
+        `pickle.dumps()`. However, some attributes do not support pickling
+        and need to be hidden, such as the JAX and Torch representations.
         """
         state = self.__dict__
         show_pickle_warning = not (
     @property
     def julia_options_(self):
+        """The deserialized julia options."""
         return jl_deserialize(self.julia_options_stream_)
     @property
     def julia_state_(self):
+        """The deserialized state."""
         return jl_deserialize(self.julia_state_stream_)
     @property
         )
         return self.julia_state_
+    def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
         """
         Get best equation using `model_selection`.
             Raised when an invalid model selection strategy is provided.
         """
         check_is_fitted(self, attributes=["equations_"])
         if index is not None:
             if isinstance(self.equations_, list):
                     index, list
                 ), "With multiple output features, index must be a list."
                 return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
+            else:
+                equations_ = cast(pd.DataFrame, self.equations_)
+                return cast(pd.Series, equations_.iloc[index])
         if isinstance(self.equations_, list):
             return [
+                cast(pd.Series, eq.loc[idx_model_selection(eq, self.model_selection)])
                 for eq in self.equations_
             ]
+        else:
+            equations_ = cast(pd.DataFrame, self.equations_)
+            return cast(
+                pd.Series,
+                equations_.loc[idx_model_selection(equations_, self.model_selection)],
+            )
     def _setup_equation_file(self):
         """
             self.equation_file_ = self.equation_file
         self.equation_file_contents_ = None
+    def _validate_and_modify_params(self) -> _DynamicallySetParams:
         """
         Ensure parameters passed at initialization are valid.
                 f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
             )
+        param_container = _DynamicallySetParams(
+            binary_operators=["+", "*", "-", "/"],
+            unary_operators=[],
+            maxdepth=self.maxsize,
+            constraints={},
+            multithreading=self.procs != 0 and self.cluster_manager is None,
+            batch_size=1,
+            update_verbosity=int(self.verbosity),
+            progress=self.progress,
+            warmup_maxsize_by=0.0,
+        )
+        for param_name in map(lambda x: x.name, fields(_DynamicallySetParams)):
+            user_param_value = getattr(self, param_name)
+            if user_param_value is None:
+                # Leave as the default in DynamicallySetParams
+                ...
             else:
+                # If user has specified it, we will override the default.
+                # However, there are some special cases to mutate it:
+                new_param_value = _mutate_parameter(param_name, user_param_value)
+                setattr(param_container, param_name, new_param_value)
+        # TODO: This should just be part of the __init__ of _DynamicallySetParams
         assert (
+            len(param_container.binary_operators) > 0
+            or len(param_container.unary_operators) > 0
+        ), "At least one operator must be provided."
+        return param_container
     def _validate_and_set_fit_params(
+        self,
+        X,
+        y,
+        Xresampled,
+        weights,
+        variable_names,
+        complexity_of_variables,
+        X_units,
+        y_units,
+    ) -> Tuple[
+        ndarray,
+        ndarray,
+        Optional[ndarray],
+        Optional[ndarray],
+        ArrayLike[str],
+        Union[int, float, List[Union[int, float]]],
+        Optional[ArrayLike[str]],
+        Optional[Union[str, ArrayLike[str]]],
+    ]:
         """
         Validate the parameters passed to the :term`fit` method.
             Weight array of the same shape as `y`.
             Each element is how to weight the mean-square-error loss
             for that particular element of y.
+        variable_names : ndarray of length n_features
+            Names of each feature in the training dataset, `X`.
+        complexity_of_variables : int | float | list[int | float]
+            Complexity of each feature in the training dataset, `X`.
         X_units : list[str] of length n_features
+            Units of each feature in the training dataset, `X`.
         y_units : str | list[str] of length n_out
+            Units of each feature in the training dataset, `y`.
         Returns
         -------
                 "Please use valid names instead."
             )
+        if (
+            complexity_of_variables is not None
+            and self.complexity_of_variables is not None
+        ):
+            raise ValueError(
+                "You cannot set `complexity_of_variables` at both `fit` and `__init__`. "
+                "Pass it at `__init__` to set it to global default, OR use `fit` to set it for "
+                "each variable individually."
+            )
+        elif complexity_of_variables is not None:
+            complexity_of_variables = complexity_of_variables
+        elif self.complexity_of_variables is not None:
+            complexity_of_variables = self.complexity_of_variables
+        else:
+            complexity_of_variables = 1
         # Data validation and feature name fetching via sklearn
         # This method sets the n_features_in_ attribute
         if Xresampled is not None:
         if weights is not None:
             weights = check_array(weights, ensure_2d=False)
             check_consistent_length(weights, y)
+        X, y = self._validate_data_X_y(X, y)
         self.feature_names_in_ = _safe_check_feature_names_in(
             self, variable_names, generate_names=False
         )
             self.display_feature_names_in_ = np.array(
                 [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
             )
+            variable_names = self.feature_names_in_
         else:
             self.display_feature_names_in_ = self.feature_names_in_
+            variable_names = self.feature_names_in_
         # Handle multioutput data
         if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
         else:
             raise NotImplementedError("y shape not supported!")
+        self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
         self.X_units_ = copy.deepcopy(X_units)
         self.y_units_ = copy.deepcopy(y_units)
+        return (
+            X,
+            y,
+            Xresampled,
+            weights,
+            variable_names,
+            complexity_of_variables,
+            X_units,
+            y_units,
+        )
+    def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
+        raw_out = self._validate_data(X=X, y=y, reset=True, multi_output=True)  # type: ignore
+        return cast(Tuple[ndarray, ndarray], raw_out)
+    def _validate_data_X(self, X) -> Tuple[ndarray]:
+        raw_out = self._validate_data(X=X, reset=False)  # type: ignore
+        return cast(Tuple[ndarray], raw_out)
     def _pre_transform_training_data(
+        self,
+        X: ndarray,
+        y: ndarray,
+        Xresampled: Union[ndarray, None],
+        variable_names: ArrayLike[str],
+        complexity_of_variables: Union[int, float, List[Union[int, float]]],
+        X_units: Union[ArrayLike[str], None],
+        y_units: Union[ArrayLike[str], str, None],
+        random_state: np.random.RandomState,
     ):
         """
         Transform the training data before fitting the symbolic regressor.
         Parameters
         ----------
+        X : ndarray
             Training data of shape (n_samples, n_features).
+        y : ndarray
             Target values of shape (n_samples,) or (n_samples, n_targets).
             Will be cast to X's dtype if necessary.
+        Xresampled : ndarray | None
             Resampled training data, of shape `(n_resampled, n_features)`,
             used for denoising.
         variable_names : list[str]
             Names of each variable in the training dataset, `X`.
             Of length `n_features`.
+        complexity_of_variables : int | float | list[int | float]
+            Complexity of each variable in the training dataset, `X`.
         X_units : list[str]
             Units of each variable in the training dataset, `X`.
         y_units : str | list[str]
         """
         # Feature selection transformation
         if self.select_k_features:
+            selection_mask = run_feature_selection(
                 X, y, self.select_k_features, random_state=random_state
             )
+            X = X[:, selection_mask]
             if Xresampled is not None:
+                Xresampled = Xresampled[:, selection_mask]
             # Reduce variable_names to selection
+            variable_names = cast(
+                ArrayLike[str],
+                [
+                    variable_names[i]
+                    for i in range(len(variable_names))
+                    if selection_mask[i]
+                ],
+            )
+            if isinstance(complexity_of_variables, list):
+                complexity_of_variables = [
+                    complexity_of_variables[i]
+                    for i in range(len(complexity_of_variables))
+                    if selection_mask[i]
+                ]
+                self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
             if X_units is not None:
+                X_units = cast(
+                    ArrayLike[str],
+                    [X_units[i] for i in range(len(X_units)) if selection_mask[i]],
+                )
                 self.X_units_ = copy.deepcopy(X_units)
             # Re-perform data validation and feature name updating
+            X, y = self._validate_data_X_y(X, y)
             # Update feature names with selected variable names
+            self.selection_mask_ = selection_mask
             self.feature_names_in_ = _check_feature_names_in(self, variable_names)
             self.display_feature_names_in_ = self.feature_names_in_
             print(f"Using features {self.feature_names_in_}")
             else:
                 X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
+        return X, y, variable_names, complexity_of_variables, X_units, y_units
+    def _run(
+        self,
+        X: ndarray,
+        y: ndarray,
+        runtime_params: _DynamicallySetParams,
+        weights: Optional[ndarray],
+        seed: int,
+    ):
         """
         Run the symbolic regression fitting process on the julia backend.
         Parameters
         ----------
+        X : ndarray
             Training data of shape `(n_samples, n_features)`.
+        y : ndarray
             Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
             Will be cast to `X`'s dtype if necessary.
+        runtime_params : DynamicallySetParams
+            Dynamically set versions of some parameters passed in __init__.
+        weights : ndarray | None
             Weight array of the same shape as `y`.
             Each element is how to weight the mean-square-error loss
             for that particular element of y.
         """
         # Need to be global as we don't want to recreate/reinstate julia for
         # every new instance of PySRRegressor
+        global ALREADY_RAN
         # These are the parameters which may be modified from the ones
         # specified in init, so we define them here locally:
+        binary_operators = runtime_params.binary_operators
+        unary_operators = runtime_params.unary_operators
+        maxdepth = runtime_params.maxdepth
+        constraints = runtime_params.constraints
+        multithreading = runtime_params.multithreading
+        batch_size = runtime_params.batch_size
+        update_verbosity = runtime_params.update_verbosity
+        progress = runtime_params.progress
+        warmup_maxsize_by = runtime_params.warmup_maxsize_by
         nested_constraints = self.nested_constraints
         complexity_of_operators = self.complexity_of_operators
+        complexity_of_variables = self.complexity_of_variables_
         cluster_manager = self.cluster_manager
         # Start julia backend processes
+        if not ALREADY_RAN and update_verbosity != 0:
             print("Compiling Julia backend...")
         if cluster_manager is not None:
                 complexity_of_operators_str += f"({k}) => {v}, "
             complexity_of_operators_str += ")"
             complexity_of_operators = jl.seval(complexity_of_operators_str)
+        # TODO: Refactor this into helper function
+        if isinstance(complexity_of_variables, list):
+            complexity_of_variables = jl_array(complexity_of_variables)
         custom_loss = jl.seval(
             str(self.elementwise_loss)
             optimize=self.weight_optimize,
         )
+        jl_binary_operators: List[Any] = []
+        jl_unary_operators: List[Any] = []
+        for input_list, output_list, name in [
+            (binary_operators, jl_binary_operators, "binary"),
+            (unary_operators, jl_unary_operators, "unary"),
+        ]:
+            for op in input_list:
+                jl_op = jl.seval(op)
+                if not jl_is_function(jl_op):
+                    raise ValueError(
+                        f"When building `{name}_operators`, `'{op}'` did not return a Julia function"
+                    )
+                output_list.append(jl_op)
         # Call to Julia backend.
         # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
         options = SymbolicRegression.Options(
+            binary_operators=jl_array(jl_binary_operators, dtype=jl.Function),
+            unary_operators=jl_array(jl_unary_operators, dtype=jl.Function),
             bin_constraints=jl_array(bin_constraints),
             una_constraints=jl_array(una_constraints),
             complexity_of_operators=complexity_of_operators,
             complexity_of_constants=self.complexity_of_constants,
+            complexity_of_variables=complexity_of_variables,
             nested_constraints=nested_constraints,
             elementwise_loss=custom_loss,
             loss_function=custom_full_objective,
             # These have the same name:
             parsimony=self.parsimony,
             dimensional_constraint_penalty=self.dimensional_constraint_penalty,
+            dimensionless_constants_only=self.dimensionless_constants_only,
             alpha=self.alpha,
             maxdepth=maxdepth,
             fast_cycle=self.fast_cycle,
             fraction_replaced_hof=self.fraction_replaced_hof,
             should_simplify=self.should_simplify,
             should_optimize_constants=self.should_optimize_constants,
+            warmup_maxsize_by=warmup_maxsize_by,
             use_frequency=self.use_frequency,
             use_frequency_in_tournament=self.use_frequency_in_tournament,
             adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
         if self.delete_tempfiles:
             shutil.rmtree(self.tempdir_)
+        ALREADY_RAN = True
         return self
         y,
         Xresampled=None,
         weights=None,
+        variable_names: Optional[ArrayLike[str]] = None,
+        complexity_of_variables: Optional[
+            Union[int, float, List[Union[int, float]]]
+        ] = None,
+        X_units: Optional[ArrayLike[str]] = None,
+        y_units: Optional[Union[str, ArrayLike[str]]] = None,
     ) -> "PySRRegressor":
         """
         Search for equations to fit the dataset and store them in `self.equations_`.
             self.selection_mask_ = None
             self.julia_state_stream_ = None
             self.julia_options_stream_ = None
+            self.complexity_of_variables_ = None
             self.X_units_ = None
             self.y_units_ = None
         self._setup_equation_file()
+        runtime_params = self._validate_and_modify_params()
         (
             X,
             Xresampled,
             weights,
             variable_names,
+            complexity_of_variables,
             X_units,
             y_units,
         ) = self._validate_and_set_fit_params(
+            X,
+            y,
+            Xresampled,
+            weights,
+            variable_names,
+            complexity_of_variables,
+            X_units,
+            y_units,
         )
         if X.shape[0] > 10000 and not self.batching:
                 "More datapoints will lower the search speed."
             )
+        random_state = check_random_state(self.random_state)  # For np random
+        seed = cast(int, random_state.randint(0, 2**31 - 1))  # For julia random
         # Pre transformations (feature selection and denoising)
+        X, y, variable_names, complexity_of_variables, X_units, y_units = (
+            self._pre_transform_training_data(
+                X,
+                y,
+                Xresampled,
+                variable_names,
+                complexity_of_variables,
+                X_units,
+                y_units,
+                random_state,
+            )
         )
         # Warn about large feature counts (still warn if feature count is large
                 "Note: you are running with 10 features or more. "
                 "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
                 "You should run PySR for more `niterations` to ensure it can find "
+                "the correct variables, and consider using a larger `maxsize`."
             )
         # Assertion checks
             X,
             use_custom_variable_names,
             variable_names,
+            complexity_of_variables,
             weights,
             y,
             X_units,
             self._checkpoint()
         # Perform the search:
+        self._run(X, y, runtime_params, weights=weights, seed=seed)
         # Then, after fit, we save again, so the pickle file contains
         # the equations:
         return self
+    def refresh(self, checkpoint_file: Optional[PathLike] = None) -> None:
         """
         Update self.equations_ with any new options passed.
         Parameters
         ----------
+        checkpoint_file : str or Path
             Path to checkpoint hall of fame file to be loaded.
             The default will use the set `equation_file_`.
         """
+        if checkpoint_file is not None:
             self.equation_file_ = checkpoint_file
             self.equation_file_contents_ = None
         check_is_fitted(self, attributes=["equation_file_"])
             if self.selection_mask_ is not None:
                 # RangeIndex enforces column order allowing columns to
                 # be correctly filtered with self.selection_mask_
+                X = X[X.columns[self.selection_mask_]]
             X.columns = self.feature_names_in_
         # Without feature information, CallableEquation/lambda_format equations
         # require that the column order of X matches that of the X used during
         # reordered/reindexed to match those of the transformed (denoised and
         # feature selected) X in fit.
         X = X.reindex(columns=self.feature_names_in_)
+        X = self._validate_data_X(X)
         try:
+            if isinstance(best_equation, list):
+                assert self.nout_ > 1
                 return np.stack(
                     [eq["lambda_format"](X) for eq in best_equation], axis=1
                 )
+            else:
+                return best_equation["lambda_format"](X)
         except Exception as error:
             raise ValueError(
                 "Failed to evaluate the expression. "
         """
         self.refresh()
         best_equation = self.get_best(index=index)
+        if isinstance(best_equation, list):
+            assert self.nout_ > 1
             return [eq["sympy_format"] for eq in best_equation]
+        else:
+            return best_equation["sympy_format"]
     def latex(self, index=None, precision=3):
         """
         self.set_params(output_jax_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
+        if isinstance(best_equation, list):
+            assert self.nout_ > 1
             return [eq["jax_format"] for eq in best_equation]
+        else:
+            return best_equation["jax_format"]
     def pytorch(self, index=None):
         """
         self.set_params(output_torch_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
+        if isinstance(best_equation, list):
             return [eq["torch_format"] for eq in best_equation]
+        else:
+            return best_equation["torch_format"]
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""
             lastComplexity = 0
             sympy_format = []
             lambda_format = []
+            jax_format = []
+            torch_format = []
             for _, eqn_row in output.iterrows():
                 eqn = pysr2sympy(
         """
         self.refresh()
+        if isinstance(self.equations_, list):
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], list)
             table_string = sympy2multilatextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
+        elif isinstance(self.equations_, pd.DataFrame):
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], int)
             table_string = sympy2latextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
+        else:
+            raise ValueError(
+                "Invalid type for equations_ to pass to `latex_table`. "
+                "Expected a DataFrame or a list of DataFrames."
+            )
+        return with_preamble(table_string)
 def idx_model_selection(equations: pd.DataFrame, model_selection: str):
             f"{model_selection} is not a valid model selection strategy."
         )
     return chosen_idx
+def _mutate_parameter(param_name: str, param_value):
+    if param_name in ["binary_operators", "unary_operators"] and isinstance(
+        param_value, str
+    ):
+        return [param_value]
+    if param_name == "batch_size" and param_value < 1:
+        warnings.warn(
+            "Given `batch_size` must be greater than or equal to one. "
+            "`batch_size` has been increased to equal one."
+        )
+        return 1
+    if (
+        param_name == "progress"
+        and param_value == True
+        and "buffer" not in sys.stdout.__dir__()
+    ):
+        warnings.warn(
+            "Note: it looks like you are running in Jupyter. "
+            "The progress bar will be turned off."
+        )
+        return False
+    return param_value

pysr/test/__main__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """CLI for running PySR's test suite."""
 import argparse
 from . import *

 """CLI for running PySR's test suite."""
 import argparse
 from . import *

pysr/test/params.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import inspect
-from .. import PySRRegressor
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

 import inspect
+from pysr import PySRRegressor
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

pysr/test/test.py CHANGED Viewed

@@ -11,12 +11,18 @@ import pandas as pd
 import sympy
 from sklearn.utils.estimator_checks import check_estimator
-from .. import PySRRegressor, install, jl
-from ..export_latex import sympy2latex
-from ..feature_selection import _handle_feature_selection, run_feature_selection
-from ..julia_helpers import init_julia
-from ..sr import _check_assertions, _process_constraints, idx_model_selection
-from ..utils import _csv_filename_to_pkl_filename
 from .params import (
     DEFAULT_NCYCLES,
     DEFAULT_NITERATIONS,
@@ -24,6 +30,11 @@ from .params import (
     DEFAULT_POPULATIONS,
 )
 class TestPipeline(unittest.TestCase):
     def setUp(self):
@@ -171,6 +182,63 @@ class TestPipeline(unittest.TestCase):
         self.assertLessEqual(mse1, 1e-4)
         self.assertLessEqual(mse2, 1e-4)
     def test_multioutput_weighted_with_callable_temp_equation(self):
         X = self.X.copy()
         y = X[:, [0, 1]] ** 2
@@ -308,7 +376,10 @@ class TestPipeline(unittest.TestCase):
                 "unused_feature": self.rstate.randn(500),
             }
         )
-        true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
         y = true_fn(X)
         noise = self.rstate.randn(500) * 0.01
         y = y + noise
@@ -367,13 +438,12 @@ class TestPipeline(unittest.TestCase):
     def test_load_model(self):
         """See if we can load a ran model from the equation file."""
-        csv_file_data = """
-        Complexity,Loss,Equation
         1,0.19951081,"1.9762075"
         3,0.12717344,"(f0 + 1.4724599)"
         4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
         # Strip the indents:
-        csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
         for from_backup in [False, True]:
             rand_dir = Path(tempfile.mkdtemp())
@@ -425,12 +495,22 @@ class TestPipeline(unittest.TestCase):
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
-        pickle_file = rand_dir / "equations.pkl"
         model3 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
         np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
 def manually_create_model(equations, feature_names=None):
     if feature_names is None:
@@ -526,7 +606,7 @@ class TestFeatureSelection(unittest.TestCase):
         X = self.rstate.randn(20000, 5)
         y = X[:, 2] ** 2 + X[:, 3] ** 2
         selected = run_feature_selection(X, y, select_k_features=2)
-        self.assertEqual(sorted(selected), [2, 3])
     def test_feature_selection_handler(self):
         X = self.rstate.randn(20000, 5)
@@ -538,8 +618,8 @@ class TestFeatureSelection(unittest.TestCase):
             variable_names=var_names,
             y=y,
         )
-        self.assertTrue((2 in selection) and (3 in selection))
-        selected_var_names = [var_names[i] for i in selection]
         self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
         np.testing.assert_array_equal(
             np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
@@ -563,6 +643,105 @@ class TestMiscellaneous(unittest.TestCase):
         test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
         self.assertEqual(test_pkl_file, str(expected_pkl_file))
     def test_deprecation(self):
         """Ensure that deprecation works as expected.
@@ -705,100 +884,28 @@ class TestMiscellaneous(unittest.TestCase):
                 model.get_best()
                 print("Failed", opt["kwargs"])
-    def test_pickle_with_temp_equation_file(self):
-        """If we have a temporary equation file, unpickle the estimator."""
-        model = PySRRegressor(
-            populations=int(1 + DEFAULT_POPULATIONS / 5),
-            temp_equation_file=True,
-            procs=0,
-            multithreading=False,
         )
-        nout = 3
-        X = np.random.randn(100, 2)
-        y = np.random.randn(100, nout)
-        model.fit(X, y)
-        contents = model.equation_file_contents_.copy()
-        y_predictions = model.predict(X)
-        equation_file_base = model.equation_file_
-        for i in range(1, nout + 1):
-            assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
-        with tempfile.NamedTemporaryFile() as pickle_file:
-            pkl.dump(model, pickle_file)
-            pickle_file.seek(0)
-            model2 = pkl.load(pickle_file)
-        contents2 = model2.equation_file_contents_
-        cols_to_check = ["equation", "loss", "complexity"]
-        for frame1, frame2 in zip(contents, contents2):
-            pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
-        y_predictions2 = model2.predict(X)
-        np.testing.assert_array_equal(y_predictions, y_predictions2)
-    def test_scikit_learn_compatibility(self):
-        """Test PySRRegressor compatibility with scikit-learn."""
-        model = PySRRegressor(
-            niterations=int(1 + DEFAULT_NITERATIONS / 10),
-            populations=int(1 + DEFAULT_POPULATIONS / 3),
-            ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
-            verbosity=0,
-            progress=False,
-            random_state=0,
-            deterministic=True,  # Deterministic as tests require this.
-            procs=0,
-            multithreading=False,
-            warm_start=False,
-            temp_equation_file=True,
-        )  # Return early.
-        check_generator = check_estimator(model, generate_only=True)
-        exception_messages = []
-        for _, check in check_generator:
-            if check.func.__name__ == "check_complex_data":
-                # We can use complex data, so avoid this check.
-                continue
-            try:
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-                    check(model)
-                print("Passed", check.func.__name__)
-            except Exception:
-                error_message = str(traceback.format_exc())
-                exception_messages.append(
-                    f"{check.func.__name__}:\n" + error_message + "\n"
-                )
-                print("Failed", check.func.__name__, "with:")
-                # Add a leading tab to error message, which
-                # might be multi-line:
-                print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
-        # If any checks failed don't let the test pass.
-        self.assertEqual(len(exception_messages), 0)
-    def test_param_groupings(self):
-        """Test that param_groupings are complete"""
-        param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
-        if not param_groupings_file.exists():
-            return
-        # Read the file, discarding lines ending in ":",
-        # and removing leading "\s*-\s*":
-        params = []
-        with open(param_groupings_file, "r") as f:
-            for line in f.readlines():
-                if line.strip().endswith(":"):
-                    continue
-                if line.strip().startswith("-"):
-                    params.append(line.strip()[1:].strip())
-        regressor_params = [
-            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
-        ]
-        # Check the sets are equal:
-        self.assertSetEqual(set(params), set(regressor_params))
 TRUE_PREAMBLE = "\n".join(
@@ -932,7 +1039,7 @@ class TestLaTeXTable(unittest.TestCase):
         middle_part_2 = r"""
             $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
             $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
-            $y_{1} = x_{0}^{2} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
         """
         true_latex_table_str = "\n\n".join(
             self.create_true_latex(part, include_score=True)
@@ -985,7 +1092,7 @@ class TestLaTeXTable(unittest.TestCase):
         middle_part = r"""
         $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
         $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
-        \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0}^{5} + x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} - 5.20 \sin{\left(2.60 x_{0} - 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
         """
         true_latex_table_str = (
             TRUE_PREAMBLE
@@ -1039,8 +1146,14 @@ class TestDimensionalConstraints(unittest.TestCase):
         """This just checks the number of units passed"""
         use_custom_variable_names = False
         variable_names = None
         weights = None
-        args = (use_custom_variable_names, variable_names, weights)
         valid_units = [
             (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
             (np.ones((10, 1)), np.ones(10), ["m/s"], None),
@@ -1148,6 +1261,7 @@ def runtests(just_tests=False):
         TestBest,
         TestFeatureSelection,
         TestMiscellaneous,
         TestLaTeXTable,
         TestDimensionalConstraints,
     ]

 import sympy
 from sklearn.utils.estimator_checks import check_estimator
+from pysr import PySRRegressor, install, jl
+from pysr.export_latex import sympy2latex
+from pysr.feature_selection import _handle_feature_selection, run_feature_selection
+from pysr.julia_helpers import init_julia
+from pysr.sr import (
+    _check_assertions,
+    _process_constraints,
+    _suggest_keywords,
+    idx_model_selection,
+)
+from pysr.utils import _csv_filename_to_pkl_filename
 from .params import (
     DEFAULT_NCYCLES,
     DEFAULT_NITERATIONS,
     DEFAULT_POPULATIONS,
 )
+# Disables local saving:
+os.environ["SYMBOLIC_REGRESSION_IS_TESTING"] = os.environ.get(
+    "SYMBOLIC_REGRESSION_IS_TESTING", "true"
+)
 class TestPipeline(unittest.TestCase):
     def setUp(self):
         self.assertLessEqual(mse1, 1e-4)
         self.assertLessEqual(mse2, 1e-4)
+    def test_custom_variable_complexity(self):
+        for outer in (True, False):
+            for case in (1, 2):
+                y = self.X[:, [0, 1]]
+                if case == 1:
+                    kwargs = dict(complexity_of_variables=[2, 3])
+                elif case == 2:
+                    kwargs = dict(complexity_of_variables=2)
+                if outer:
+                    outer_kwargs = kwargs
+                    inner_kwargs = dict()
+                else:
+                    outer_kwargs = dict()
+                    inner_kwargs = kwargs
+                model = PySRRegressor(
+                    binary_operators=["+"],
+                    verbosity=0,
+                    **self.default_test_kwargs,
+                    early_stop_condition=(
+                        f"stop_if_{case}(l, c) = l < 1e-8 && c <= {3 if case == 1 else 2}"
+                    ),
+                    **outer_kwargs,
+                )
+                model.fit(self.X[:, [0, 1]], y, **inner_kwargs)
+                self.assertLessEqual(model.get_best()[0]["loss"], 1e-8)
+                self.assertLessEqual(model.get_best()[1]["loss"], 1e-8)
+                self.assertEqual(model.get_best()[0]["complexity"], 2)
+                self.assertEqual(
+                    model.get_best()[1]["complexity"], 3 if case == 1 else 2
+                )
+    def test_error_message_custom_variable_complexity(self):
+        X = np.ones((10, 2))
+        y = np.ones((10,))
+        model = PySRRegressor()
+        with self.assertRaises(ValueError) as cm:
+            model.fit(X, y, complexity_of_variables=[1, 2, 3])
+        self.assertIn(
+            "number of elements in `complexity_of_variables`", str(cm.exception)
+        )
+    def test_error_message_both_variable_complexity(self):
+        X = np.ones((10, 2))
+        y = np.ones((10,))
+        model = PySRRegressor(complexity_of_variables=[1, 2])
+        with self.assertRaises(ValueError) as cm:
+            model.fit(X, y, complexity_of_variables=[1, 2, 3])
+        self.assertIn(
+            "You cannot set `complexity_of_variables` at both `fit` and `__init__`.",
+            str(cm.exception),
+        )
     def test_multioutput_weighted_with_callable_temp_equation(self):
         X = self.X.copy()
         y = X[:, [0, 1]] ** 2
                 "unused_feature": self.rstate.randn(500),
             }
         )
+        def true_fn(x):
+            return np.array(x["T"] + x["x"] ** 2 + 1.323837)
         y = true_fn(X)
         noise = self.rstate.randn(500) * 0.01
         y = y + noise
     def test_load_model(self):
         """See if we can load a ran model from the equation file."""
+        csv_file_data = """Complexity,Loss,Equation
         1,0.19951081,"1.9762075"
         3,0.12717344,"(f0 + 1.4724599)"
         4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
         # Strip the indents:
+        csv_file_data = "\n".join([line.strip() for line in csv_file_data.split("\n")])
         for from_backup in [False, True]:
             rand_dir = Path(tempfile.mkdtemp())
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
+        # pickle_file = rand_dir / "equations.pkl"
         model3 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
         np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
+    def test_jl_function_error(self):
+        # TODO: Move this to better class
+        with self.assertRaises(ValueError) as cm:
+            PySRRegressor(unary_operators=["1"]).fit([[1]], [1])
+        self.assertIn(
+            "When building `unary_operators`, `'1'` did not return a Julia function",
+            str(cm.exception),
+        )
 def manually_create_model(equations, feature_names=None):
     if feature_names is None:
         X = self.rstate.randn(20000, 5)
         y = X[:, 2] ** 2 + X[:, 3] ** 2
         selected = run_feature_selection(X, y, select_k_features=2)
+        np.testing.assert_array_equal(selected, [False, False, True, True, False])
     def test_feature_selection_handler(self):
         X = self.rstate.randn(20000, 5)
             variable_names=var_names,
             y=y,
         )
+        np.testing.assert_array_equal(selection, [False, False, True, True, False])
+        selected_var_names = [var_names[i] for i in range(5) if selection[i]]
         self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
         np.testing.assert_array_equal(
             np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
         test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
         self.assertEqual(test_pkl_file, str(expected_pkl_file))
+    def test_pickle_with_temp_equation_file(self):
+        """If we have a temporary equation file, unpickle the estimator."""
+        model = PySRRegressor(
+            populations=int(1 + DEFAULT_POPULATIONS / 5),
+            temp_equation_file=True,
+            procs=0,
+            multithreading=False,
+        )
+        nout = 3
+        X = np.random.randn(100, 2)
+        y = np.random.randn(100, nout)
+        model.fit(X, y)
+        contents = model.equation_file_contents_.copy()
+        y_predictions = model.predict(X)
+        equation_file_base = model.equation_file_
+        for i in range(1, nout + 1):
+            assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
+        with tempfile.NamedTemporaryFile() as pickle_file:
+            pkl.dump(model, pickle_file)
+            pickle_file.seek(0)
+            model2 = pkl.load(pickle_file)
+        contents2 = model2.equation_file_contents_
+        cols_to_check = ["equation", "loss", "complexity"]
+        for frame1, frame2 in zip(contents, contents2):
+            pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
+        y_predictions2 = model2.predict(X)
+        np.testing.assert_array_almost_equal(y_predictions, y_predictions2)
+    def test_scikit_learn_compatibility(self):
+        """Test PySRRegressor compatibility with scikit-learn."""
+        model = PySRRegressor(
+            niterations=int(1 + DEFAULT_NITERATIONS / 10),
+            populations=int(1 + DEFAULT_POPULATIONS / 3),
+            ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
+            verbosity=0,
+            progress=False,
+            random_state=0,
+            deterministic=True,  # Deterministic as tests require this.
+            procs=0,
+            multithreading=False,
+            warm_start=False,
+            temp_equation_file=True,
+        )  # Return early.
+        check_generator = check_estimator(model, generate_only=True)
+        exception_messages = []
+        for _, check in check_generator:
+            if check.func.__name__ == "check_complex_data":
+                # We can use complex data, so avoid this check.
+                continue
+            try:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    check(model)
+                print("Passed", check.func.__name__)
+            except Exception:
+                error_message = str(traceback.format_exc())
+                exception_messages.append(
+                    f"{check.func.__name__}:\n" + error_message + "\n"
+                )
+                print("Failed", check.func.__name__, "with:")
+                # Add a leading tab to error message, which
+                # might be multi-line:
+                print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
+        # If any checks failed don't let the test pass.
+        self.assertEqual(len(exception_messages), 0)
+    def test_param_groupings(self):
+        """Test that param_groupings are complete"""
+        param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
+        if not param_groupings_file.exists():
+            return
+        # Read the file, discarding lines ending in ":",
+        # and removing leading "\s*-\s*":
+        params = []
+        with open(param_groupings_file, "r") as f:
+            for line in f.readlines():
+                if line.strip().endswith(":"):
+                    continue
+                if line.strip().startswith("-"):
+                    params.append(line.strip()[1:].strip())
+        regressor_params = [
+            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
+        ]
+        # Check the sets are equal:
+        self.assertSetEqual(set(params), set(regressor_params))
+class TestHelpMessages(unittest.TestCase):
+    """Test user help messages."""
     def test_deprecation(self):
         """Ensure that deprecation works as expected.
                 model.get_best()
                 print("Failed", opt["kwargs"])
+    def test_suggest_keywords(self):
+        # Easy
+        self.assertEqual(
+            _suggest_keywords(PySRRegressor, "loss_function"), ["loss_function"]
         )
+        # More complex, and with error
+        with self.assertRaises(TypeError) as cm:
+            model = PySRRegressor(ncyclesperiterationn=5)
+        self.assertIn(
+            "`ncyclesperiterationn` is not a valid keyword", str(cm.exception)
+        )
+        self.assertIn("Did you mean", str(cm.exception))
+        self.assertIn("`ncycles_per_iteration`, ", str(cm.exception))
+        self.assertIn("`niterations`", str(cm.exception))
+        # Farther matches (this might need to be changed)
+        with self.assertRaises(TypeError) as cm:
+            model = PySRRegressor(operators=["+", "-"])
+        self.assertIn("`unary_operators`, `binary_operators`", str(cm.exception))
 TRUE_PREAMBLE = "\n".join(
         middle_part_2 = r"""
             $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
             $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
+            $y_{1} = x_{0} x_{0} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
         """
         true_latex_table_str = "\n\n".join(
             self.create_true_latex(part, include_score=True)
         middle_part = r"""
         $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
         $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
+        \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0} x_{0} x_{0} + x_{0} x_{0} x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + 5.20 \sin{\left(- 2.60 x_{0} + 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
         """
         true_latex_table_str = (
             TRUE_PREAMBLE
         """This just checks the number of units passed"""
         use_custom_variable_names = False
         variable_names = None
+        complexity_of_variables = 1
         weights = None
+        args = (
+            use_custom_variable_names,
+            variable_names,
+            complexity_of_variables,
+            weights,
+        )
         valid_units = [
             (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
             (np.ones((10, 1)), np.ones(10), ["m/s"], None),
         TestBest,
         TestFeatureSelection,
         TestMiscellaneous,
+        TestHelpMessages,
         TestLaTeXTable,
         TestDimensionalConstraints,
     ]

pysr/test/test_jax.py CHANGED Viewed

@@ -5,27 +5,29 @@ import numpy as np
 import pandas as pd
 import sympy
-from .. import PySRRegressor, sympy2jax
 class TestJAX(unittest.TestCase):
     def setUp(self):
         np.random.seed(0)
     def test_sympy2jax(self):
-        from jax import numpy as jnp
         from jax import random
         x, y, z = sympy.symbols("x y z")
         cosx = 1.0 * sympy.cos(x) + y
         key = random.PRNGKey(0)
         X = random.normal(key, (1000, 2))
-        true = 1.0 * jnp.cos(X[:, 0]) + X[:, 1]
         f, params = sympy2jax(cosx, [x, y, z])
-        self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
     def test_pipeline_pandas(self):
-        from jax import numpy as jnp
         X = pd.DataFrame(np.random.randn(100, 10))
         y = np.ones(X.shape[0])
@@ -52,14 +54,12 @@ class TestJAX(unittest.TestCase):
         jformat = model.jax()
         np.testing.assert_almost_equal(
-            np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X.values[:, 1])),  # Select feature 1
             decimal=3,
         )
     def test_pipeline(self):
-        from jax import numpy as jnp
         X = np.random.randn(100, 10)
         y = np.ones(X.shape[0])
         model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
@@ -81,15 +81,46 @@ class TestJAX(unittest.TestCase):
         jformat = model.jax()
         np.testing.assert_almost_equal(
-            np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X[:, 1])),  # Select feature 1
             decimal=3,
         )
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
-        cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

 import pandas as pd
 import sympy
+import pysr
+from pysr import PySRRegressor, sympy2jax
 class TestJAX(unittest.TestCase):
     def setUp(self):
         np.random.seed(0)
+        from jax import numpy as jnp
+        self.jnp = jnp
     def test_sympy2jax(self):
         from jax import random
         x, y, z = sympy.symbols("x y z")
         cosx = 1.0 * sympy.cos(x) + y
         key = random.PRNGKey(0)
         X = random.normal(key, (1000, 2))
+        true = 1.0 * self.jnp.cos(X[:, 0]) + X[:, 1]
         f, params = sympy2jax(cosx, [x, y, z])
+        self.assertTrue(self.jnp.all(self.jnp.isclose(f(X, params), true)).item())
     def test_pipeline_pandas(self):
         X = pd.DataFrame(np.random.randn(100, 10))
         y = np.ones(X.shape[0])
         jformat = model.jax()
         np.testing.assert_almost_equal(
+            np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X.values[:, 1])),  # Select feature 1
             decimal=3,
         )
     def test_pipeline(self):
         X = np.random.randn(100, 10)
         y = np.ones(X.shape[0])
         model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
         jformat = model.jax()
         np.testing.assert_almost_equal(
+            np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X[:, 1])),  # Select feature 1
             decimal=3,
         )
+    def test_avoid_simplification(self):
+        ex = pysr.export_sympy.pysr2sympy(
+            "square(exp(sign(0.44796443))) + 1.5 * x1",
+            feature_names_in=["x1"],
+            extra_sympy_mappings={"square": lambda x: x**2},
+        )
+        f, params = pysr.export_jax.sympy2jax(ex, [sympy.symbols("x1")])
+        key = np.random.RandomState(0)
+        X = key.randn(10, 1)
+        np.testing.assert_almost_equal(
+            np.array(f(self.jnp.array(X), params)),
+            np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
+            decimal=3,
+        )
+    def test_issue_656(self):
+        import sympy
+        E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
+        f, params = pysr.export_jax.sympy2jax(E_plus_x1, [sympy.symbols("x1")])
+        key = np.random.RandomState(0)
+        X = key.randn(10, 1)
+        np.testing.assert_almost_equal(
+            np.array(f(self.jnp.array(X), params)),
+            np.exp(1) + X[:, 0],
+            decimal=3,
+        )
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
+        def cos_approx(x):
+            return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

pysr/test/test_startup.py CHANGED Viewed

@@ -9,8 +9,9 @@ from pathlib import Path
 import numpy as np
-from .. import PySRRegressor
-from ..julia_import import jl_version
 from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
@@ -118,10 +119,6 @@ class TestStartup(unittest.TestCase):
                 code="import juliacall; import pysr",
                 msg="juliacall module already imported.",
             ),
-            dict(
-                code='import os; os.environ["PYSR_AUTOLOAD_EXTENSIONS"] = "foo"; import pysr',
-                msg="PYSR_AUTOLOAD_EXTENSIONS environment variable is set",
-            ),
         ]
         for warning_test in warning_tests:
             result = subprocess.run(

 import numpy as np
+from pysr import PySRRegressor
+from pysr.julia_import import jl_version
 from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
                 code="import juliacall; import pysr",
                 msg="juliacall module already imported.",
             ),
         ]
         for warning_test in warning_tests:
             result = subprocess.run(

pysr/test/test_torch.py CHANGED Viewed

@@ -4,7 +4,8 @@ import numpy as np
 import pandas as pd
 import sympy
-from .. import PySRRegressor, sympy2torch
 class TestTorch(unittest.TestCase):
@@ -153,10 +154,43 @@ class TestTorch(unittest.TestCase):
             decimal=3,
         )
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
-        cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

 import pandas as pd
 import sympy
+import pysr
+from pysr import PySRRegressor, sympy2torch
 class TestTorch(unittest.TestCase):
             decimal=3,
         )
+    def test_avoid_simplification(self):
+        # SymPy should not simplify without permission
+        torch = self.torch
+        ex = pysr.export_sympy.pysr2sympy(
+            "square(exp(sign(0.44796443))) + 1.5 * x1",
+            # ^ Normally this would become exp1 and require
+            #   its own mapping
+            feature_names_in=["x1"],
+            extra_sympy_mappings={"square": lambda x: x**2},
+        )
+        m = pysr.export_torch.sympy2torch(ex, ["x1"])
+        rng = np.random.RandomState(0)
+        X = rng.randn(10, 1)
+        np.testing.assert_almost_equal(
+            m(torch.tensor(X)).detach().numpy(),
+            np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
+            decimal=3,
+        )
+    def test_issue_656(self):
+        # Should correctly map numeric symbols to floats
+        E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
+        m = pysr.export_torch.sympy2torch(E_plus_x1, ["x1"])
+        X = np.random.randn(10, 1)
+        np.testing.assert_almost_equal(
+            m(self.torch.tensor(X)).detach().numpy(),
+            np.exp(1) + X[:, 0],
+            decimal=3,
+        )
     def test_feature_selection_custom_operators(self):
         rstate = np.random.RandomState(0)
         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
+        def cos_approx(x):
+            return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
         model = PySRRegressor(

pysr/utils.py CHANGED Viewed

@@ -1,10 +1,20 @@
 import os
 import re
-from sklearn.utils.validation import _check_feature_names_in
-def _csv_filename_to_pkl_filename(csv_filename: str) -> str:
     if os.path.splitext(csv_filename)[1] == ".pkl":
         return csv_filename
@@ -53,3 +63,13 @@ def _subscriptify(i: int) -> str:
     For example, 123 -> "₁₂₃".
     """
     return "".join([chr(0x2080 + int(c)) for c in str(i)])

+import difflib
+import inspect
 import os
 import re
+from pathlib import Path
+from typing import Any, List, TypeVar, Union
+from numpy import ndarray
+from sklearn.utils.validation import _check_feature_names_in  # type: ignore
+T = TypeVar("T", bound=Any)
+ArrayLike = Union[ndarray, List[T]]
+PathLike = Union[str, Path]
+def _csv_filename_to_pkl_filename(csv_filename: PathLike) -> PathLike:
     if os.path.splitext(csv_filename)[1] == ".pkl":
         return csv_filename
     For example, 123 -> "₁₂₃".
     """
     return "".join([chr(0x2080 + int(c)) for c in str(i)])
+def _suggest_keywords(cls, k: str) -> List[str]:
+    valid_keywords = [
+        param
+        for param in inspect.signature(cls.__init__).parameters
+        if param not in ["self", "kwargs"]
+    ]
+    suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
+    return suggestions

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
 sympy>=1.0.0,<2.0.0
 pandas>=0.21.0,<3.0.0
-numpy>=1.13.0,<2.0.0
 scikit_learn>=1.0.0,<2.0.0
-juliacall==0.9.19
 click>=7.0.0,<9.0.0
 setuptools>=50.0.0
-typing_extensions>=4.0.0,<5.0.0; python_version < "3.8"

 sympy>=1.0.0,<2.0.0
 pandas>=0.21.0,<3.0.0
+numpy>=1.13.0,<3.0.0
 scikit_learn>=1.0.0,<2.0.0
+juliacall==0.9.20
 click>=7.0.0,<9.0.0
 setuptools>=50.0.0