MilesCranmer commited on
Commit
11be150
β€’
2 Parent(s): 0f7799e db44938

Merge branch 'master' into gui

Browse files
.github/workflows/CI.yml CHANGED
@@ -5,20 +5,14 @@ on:
5
  branches:
6
  - '**'
7
  paths:
8
- - 'test/**'
9
- - 'pysr/**'
10
- - '.github/workflows/CI.yml'
11
- - 'setup.py'
12
  tags:
13
  - 'v*.*.*'
14
  pull_request:
15
  branches:
16
- - '*'
17
  paths:
18
- - 'test/**'
19
- - 'pysr/**'
20
- - '.github/workflows/CI.yml'
21
- - 'setup.py'
22
 
23
  jobs:
24
  test:
@@ -32,12 +26,12 @@ jobs:
32
  strategy:
33
  matrix:
34
  julia-version: ['1']
35
- python-version: ['3.11']
36
  os: [ubuntu-latest]
37
  test-id: [main]
38
  include:
39
  - julia-version: '1.6'
40
- python-version: '3.7'
41
  os: ubuntu-latest
42
  test-id: include
43
  - julia-version: '1'
@@ -48,11 +42,11 @@ jobs:
48
  steps:
49
  - uses: actions/checkout@v4
50
  - name: "Set up Julia"
51
- uses: julia-actions/setup-julia@v1
52
  with:
53
  version: ${{ matrix.julia-version }}
54
  - name: "Cache Julia"
55
- uses: julia-actions/cache@v1
56
  with:
57
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
58
  cache-packages: false
@@ -90,7 +84,7 @@ jobs:
90
  - name: "Coveralls"
91
  env:
92
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
93
- COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}
94
  COVERALLS_PARALLEL: true
95
  run: coveralls --service=github
96
 
@@ -99,11 +93,11 @@ jobs:
99
  strategy:
100
  matrix:
101
  os: ['ubuntu-latest']
102
- python-version: ['3.11']
103
  julia-version: ['1']
104
  include:
105
  - os: ubuntu-latest
106
- python-version: '3.7'
107
  julia-version: '1.6'
108
  steps:
109
  - uses: actions/checkout@v4
@@ -122,7 +116,7 @@ jobs:
122
  shell: bash -l {0}
123
  strategy:
124
  matrix:
125
- python-version: ['3.11']
126
  os: ['ubuntu-latest']
127
 
128
  steps:
@@ -144,7 +138,7 @@ jobs:
144
  activate-environment: pysr-test
145
  environment-file: environment.yml
146
  - name: "Cache Julia"
147
- uses: julia-actions/cache@v1
148
  with:
149
  cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
150
  cache-packages: false
@@ -181,8 +175,8 @@ jobs:
181
  strategy:
182
  matrix:
183
  python-version:
184
- - '3.11'
185
- - '3.7'
186
  os: ['ubuntu-latest']
187
 
188
  steps:
@@ -199,10 +193,10 @@ jobs:
199
  pip install mypy
200
  - name: "Install additional dependencies"
201
  run: python -m pip install jax jaxlib torch
202
- if: ${{ matrix.python-version != '3.7' }}
203
  - name: "Run mypy"
204
  run: python -m mypy --install-types --non-interactive pysr
205
- if: ${{ matrix.python-version != '3.7' }}
206
  - name: "Run compatible mypy"
207
  run: python -m mypy --ignore-missing-imports pysr
208
- if: ${{ matrix.python-version == '3.7' }}
 
5
  branches:
6
  - '**'
7
  paths:
8
+ - '**'
 
 
 
9
  tags:
10
  - 'v*.*.*'
11
  pull_request:
12
  branches:
13
+ - 'master'
14
  paths:
15
+ - '**'
 
 
 
16
 
17
  jobs:
18
  test:
 
26
  strategy:
27
  matrix:
28
  julia-version: ['1']
29
+ python-version: ['3.12']
30
  os: [ubuntu-latest]
31
  test-id: [main]
32
  include:
33
  - julia-version: '1.6'
34
+ python-version: '3.8'
35
  os: ubuntu-latest
36
  test-id: include
37
  - julia-version: '1'
 
42
  steps:
43
  - uses: actions/checkout@v4
44
  - name: "Set up Julia"
45
+ uses: julia-actions/setup-julia@v2
46
  with:
47
  version: ${{ matrix.julia-version }}
48
  - name: "Cache Julia"
49
+ uses: julia-actions/cache@v2
50
  with:
51
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
52
  cache-packages: false
 
84
  - name: "Coveralls"
85
  env:
86
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
87
+ COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }}-${{ matrix.test-id }}
88
  COVERALLS_PARALLEL: true
89
  run: coveralls --service=github
90
 
 
93
  strategy:
94
  matrix:
95
  os: ['ubuntu-latest']
96
+ python-version: ['3.12']
97
  julia-version: ['1']
98
  include:
99
  - os: ubuntu-latest
100
+ python-version: '3.8'
101
  julia-version: '1.6'
102
  steps:
103
  - uses: actions/checkout@v4
 
116
  shell: bash -l {0}
117
  strategy:
118
  matrix:
119
+ python-version: ['3.12']
120
  os: ['ubuntu-latest']
121
 
122
  steps:
 
138
  activate-environment: pysr-test
139
  environment-file: environment.yml
140
  - name: "Cache Julia"
141
+ uses: julia-actions/cache@v2
142
  with:
143
  cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
144
  cache-packages: false
 
175
  strategy:
176
  matrix:
177
  python-version:
178
+ - '3.12'
179
+ - '3.8'
180
  os: ['ubuntu-latest']
181
 
182
  steps:
 
193
  pip install mypy
194
  - name: "Install additional dependencies"
195
  run: python -m pip install jax jaxlib torch
196
+ if: ${{ matrix.python-version != '3.8' }}
197
  - name: "Run mypy"
198
  run: python -m mypy --install-types --non-interactive pysr
199
+ if: ${{ matrix.python-version != '3.8' }}
200
  - name: "Run compatible mypy"
201
  run: python -m mypy --ignore-missing-imports pysr
202
+ if: ${{ matrix.python-version == '3.8' }}
.github/workflows/CI_Windows.yml CHANGED
@@ -3,22 +3,16 @@ name: Windows
3
  on:
4
  push:
5
  branches:
6
- - '**'
7
  paths:
8
- - 'test/**'
9
- - 'pysr/**'
10
- - '.github/workflows/CI_Windows.yml'
11
- - 'setup.py'
12
  tags:
13
  - 'v*.*.*'
14
  pull_request:
15
  branches:
16
- - '*'
17
  paths:
18
- - 'test/**'
19
- - 'pysr/**'
20
- - '.github/workflows/CI_Windows.yml'
21
- - 'setup.py'
22
 
23
  jobs:
24
  test:
@@ -30,17 +24,17 @@ jobs:
30
  strategy:
31
  matrix:
32
  julia-version: ['1']
33
- python-version: ['3.11']
34
  os: [windows-latest]
35
 
36
  steps:
37
  - uses: actions/checkout@v4
38
  - name: "Set up Julia"
39
- uses: julia-actions/setup-julia@v1
40
  with:
41
  version: ${{ matrix.julia-version }}
42
  - name: "Cache Julia"
43
- uses: julia-actions/cache@v1
44
  with:
45
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
46
  cache-packages: false
@@ -52,7 +46,7 @@ jobs:
52
  - name: "Install PySR"
53
  run: |
54
  python -m pip install --upgrade pip
55
- pip install pytest nbval
56
  pip install .
57
  python -c 'import pysr'
58
  - name: "Run tests"
 
3
  on:
4
  push:
5
  branches:
6
+ - 'master'
7
  paths:
8
+ - '**'
 
 
 
9
  tags:
10
  - 'v*.*.*'
11
  pull_request:
12
  branches:
13
+ - 'master'
14
  paths:
15
+ - '**'
 
 
 
16
 
17
  jobs:
18
  test:
 
24
  strategy:
25
  matrix:
26
  julia-version: ['1']
27
+ python-version: ['3.12']
28
  os: [windows-latest]
29
 
30
  steps:
31
  - uses: actions/checkout@v4
32
  - name: "Set up Julia"
33
+ uses: julia-actions/setup-julia@v2
34
  with:
35
  version: ${{ matrix.julia-version }}
36
  - name: "Cache Julia"
37
+ uses: julia-actions/cache@v2
38
  with:
39
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
40
  cache-packages: false
 
46
  - name: "Install PySR"
47
  run: |
48
  python -m pip install --upgrade pip
49
+ pip install pytest nbval "numpy<2.0.0"
50
  pip install .
51
  python -c 'import pysr'
52
  - name: "Run tests"
.github/workflows/CI_docker.yml CHANGED
@@ -3,22 +3,16 @@ name: Docker
3
  on:
4
  push:
5
  branches:
6
- - '**'
7
  paths:
8
- - 'test/**'
9
- - 'pysr/**'
10
- - '.github/workflows/CI_docker.yml'
11
- - 'setup.py'
12
- - 'Dockerfile'
13
  pull_request:
14
  branches:
15
- - '*'
16
  paths:
17
- - 'test/**'
18
- - 'pysr/**'
19
- - '.github/workflows/CI_docker.yml'
20
- - 'setup.py'
21
- - 'Dockerfile'
22
 
23
  jobs:
24
  test:
 
3
  on:
4
  push:
5
  branches:
6
+ - 'master'
7
  paths:
8
+ - '**'
9
+ tags:
10
+ - 'v*.*.*'
 
 
11
  pull_request:
12
  branches:
13
+ - 'master'
14
  paths:
15
+ - '**'
 
 
 
 
16
 
17
  jobs:
18
  test:
.github/workflows/CI_docker_large_nightly.yml CHANGED
@@ -19,7 +19,7 @@ jobs:
19
  fail-fast: false
20
  matrix:
21
  julia-version: ['1.6', '1']
22
- python-version: ['3.7', '3.11']
23
  os: [ubuntu-latest]
24
  arch: ['linux/amd64', 'linux/arm64']
25
 
@@ -27,7 +27,7 @@ jobs:
27
  steps:
28
  - uses: actions/checkout@v4
29
  - name: Set up QEMU
30
- uses: docker/setup-qemu-action@v2
31
  with:
32
  platforms: all
33
  - name: Build docker
 
19
  fail-fast: false
20
  matrix:
21
  julia-version: ['1.6', '1']
22
+ python-version: ['3.8', '3.12']
23
  os: [ubuntu-latest]
24
  arch: ['linux/amd64', 'linux/arm64']
25
 
 
27
  steps:
28
  - uses: actions/checkout@v4
29
  - name: Set up QEMU
30
+ uses: docker/setup-qemu-action@v3
31
  with:
32
  platforms: all
33
  - name: Build docker
.github/workflows/CI_large_nightly.yml CHANGED
@@ -23,14 +23,14 @@ jobs:
23
  strategy:
24
  fail-fast: false
25
  matrix:
26
- julia-version: ['1.6', '1.8', '1.9']
27
- python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
28
  os: [ubuntu-latest, macos-latest, windows-latest]
29
 
30
  steps:
31
  - uses: actions/checkout@v4
32
  - name: "Set up Julia"
33
- uses: julia-actions/setup-julia@v1
34
  with:
35
  version: ${{ matrix.julia-version }}
36
  - name: "Set up Python"
 
23
  strategy:
24
  fail-fast: false
25
  matrix:
26
+ julia-version: ['1.6', '1.8', '1.10']
27
+ python-version: ['3.8', '3.10', '3.12']
28
  os: [ubuntu-latest, macos-latest, windows-latest]
29
 
30
  steps:
31
  - uses: actions/checkout@v4
32
  - name: "Set up Julia"
33
+ uses: julia-actions/setup-julia@v2
34
  with:
35
  version: ${{ matrix.julia-version }}
36
  - name: "Set up Python"
.github/workflows/CI_mac.yml CHANGED
@@ -3,22 +3,16 @@ name: macOS
3
  on:
4
  push:
5
  branches:
6
- - '**'
7
  paths:
8
- - 'test/**'
9
- - 'pysr/**'
10
- - '.github/workflows/CI_mac.yml'
11
- - 'setup.py'
12
  tags:
13
  - 'v*.*.*'
14
  pull_request:
15
  branches:
16
- - '*'
17
  paths:
18
- - 'test/**'
19
- - 'pysr/**'
20
- - '.github/workflows/CI_mac.yml'
21
- - 'setup.py'
22
 
23
  jobs:
24
  test:
@@ -30,17 +24,17 @@ jobs:
30
  strategy:
31
  matrix:
32
  julia-version: ['1']
33
- python-version: ['3.11']
34
  os: [macos-latest]
35
 
36
  steps:
37
  - uses: actions/checkout@v4
38
  - name: "Set up Julia"
39
- uses: julia-actions/setup-julia@v1
40
  with:
41
  version: ${{ matrix.julia-version }}
42
  - name: "Cache Julia"
43
- uses: julia-actions/cache@v1
44
  with:
45
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
46
  cache-packages: false
 
3
  on:
4
  push:
5
  branches:
6
+ - 'master'
7
  paths:
8
+ - '**'
 
 
 
9
  tags:
10
  - 'v*.*.*'
11
  pull_request:
12
  branches:
13
+ - 'master'
14
  paths:
15
+ - '**'
 
 
 
16
 
17
  jobs:
18
  test:
 
24
  strategy:
25
  matrix:
26
  julia-version: ['1']
27
+ python-version: ['3.12']
28
  os: [macos-latest]
29
 
30
  steps:
31
  - uses: actions/checkout@v4
32
  - name: "Set up Julia"
33
+ uses: julia-actions/setup-julia@v2
34
  with:
35
  version: ${{ matrix.julia-version }}
36
  - name: "Cache Julia"
37
+ uses: julia-actions/cache@v2
38
  with:
39
  cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
40
  cache-packages: false
.github/workflows/docker_deploy.yml CHANGED
@@ -18,19 +18,19 @@ jobs:
18
  matrix:
19
  os: [ubuntu-latest]
20
  arch: [linux/amd64]
21
- python-version: [3.11.6]
22
- julia-version: [1.9.4]
23
  steps:
24
  - name: Checkout
25
  uses: actions/checkout@v4
26
  - name: Login to Docker Hub
27
- uses: docker/login-action@v2
28
  if: github.event_name != 'pull_request'
29
  with:
30
  username: ${{ secrets.DOCKERHUB_USERNAME }}
31
  password: ${{ secrets.DOCKERHUB_TOKEN }}
32
  - name: Login to GitHub registry
33
- uses: docker/login-action@v2
34
  if: github.event_name != 'pull_request'
35
  with:
36
  registry: ghcr.io
@@ -55,11 +55,11 @@ jobs:
55
  type=sha
56
  type=raw,value=latest,enable={{is_default_branch}}
57
  - name: Set up QEMU
58
- uses: docker/setup-qemu-action@v2
59
  - name: Set up Docker Buildx
60
  uses: docker/setup-buildx-action@v3
61
  - name: Build and push
62
- uses: docker/build-push-action@v5
63
  with:
64
  context: .
65
  platforms: ${{ matrix.arch }}
 
18
  matrix:
19
  os: [ubuntu-latest]
20
  arch: [linux/amd64]
21
+ python-version: [3.12.3]
22
+ julia-version: [1.10.3]
23
  steps:
24
  - name: Checkout
25
  uses: actions/checkout@v4
26
  - name: Login to Docker Hub
27
+ uses: docker/login-action@v3
28
  if: github.event_name != 'pull_request'
29
  with:
30
  username: ${{ secrets.DOCKERHUB_USERNAME }}
31
  password: ${{ secrets.DOCKERHUB_TOKEN }}
32
  - name: Login to GitHub registry
33
+ uses: docker/login-action@v3
34
  if: github.event_name != 'pull_request'
35
  with:
36
  registry: ghcr.io
 
55
  type=sha
56
  type=raw,value=latest,enable={{is_default_branch}}
57
  - name: Set up QEMU
58
+ uses: docker/setup-qemu-action@v3
59
  - name: Set up Docker Buildx
60
  uses: docker/setup-buildx-action@v3
61
  - name: Build and push
62
+ uses: docker/build-push-action@v6
63
  with:
64
  context: .
65
  platforms: ${{ matrix.arch }}
.github/workflows/update_backend.yml CHANGED
@@ -40,7 +40,6 @@ jobs:
40
  - name: "Create PR if necessary"
41
  uses: peter-evans/create-pull-request@v6
42
  with:
43
- token: ${{ secrets.REPO_SCOPED_TOKEN }}
44
  title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
45
  body: |
46
  This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`
 
40
  - name: "Create PR if necessary"
41
  uses: peter-evans/create-pull-request@v6
42
  with:
 
43
  title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
44
  body: |
45
  This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`
.gitignore CHANGED
@@ -23,3 +23,5 @@ site
23
  **/*.code-workspace
24
  **/*.tar.gz
25
  venv
 
 
 
23
  **/*.code-workspace
24
  **/*.tar.gz
25
  venv
26
+ requirements-dev.lock
27
+ requirements.lock
.pre-commit-config.yaml CHANGED
@@ -1,7 +1,7 @@
1
  repos:
2
  # General linting
3
  - repo: https://github.com/pre-commit/pre-commit-hooks
4
- rev: v4.5.0
5
  hooks:
6
  - id: trailing-whitespace
7
  - id: end-of-file-fixer
@@ -9,14 +9,14 @@ repos:
9
  - id: check-added-large-files
10
  # General formatting
11
  - repo: https://github.com/psf/black
12
- rev: 23.12.1
13
  hooks:
14
  - id: black
15
  - id: black-jupyter
16
  exclude: pysr/test/test_nb.ipynb
17
  # Stripping notebooks
18
  - repo: https://github.com/kynan/nbstripout
19
- rev: 0.6.1
20
  hooks:
21
  - id: nbstripout
22
  exclude: pysr/test/test_nb.ipynb
 
1
  repos:
2
  # General linting
3
  - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.6.0
5
  hooks:
6
  - id: trailing-whitespace
7
  - id: end-of-file-fixer
 
9
  - id: check-added-large-files
10
  # General formatting
11
  - repo: https://github.com/psf/black
12
+ rev: 24.4.2
13
  hooks:
14
  - id: black
15
  - id: black-jupyter
16
  exclude: pysr/test/test_nb.ipynb
17
  # Stripping notebooks
18
  - repo: https://github.com/kynan/nbstripout
19
+ rev: 0.7.1
20
  hooks:
21
  - id: nbstripout
22
  exclude: pysr/test/test_nb.ipynb
Dockerfile CHANGED
@@ -1,8 +1,8 @@
1
  # This builds a dockerfile containing a working copy of PySR
2
  # with all pre-requisites installed.
3
 
4
- ARG JLVERSION=1.10.0
5
- ARG PYVERSION=3.11.6
6
  ARG BASE_IMAGE=bullseye
7
 
8
  FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
 
1
  # This builds a dockerfile containing a working copy of PySR
2
  # with all pre-requisites installed.
3
 
4
+ ARG JLVERSION=1.10.4
5
+ ARG PYVERSION=3.12.2
6
  ARG BASE_IMAGE=bullseye
7
 
8
  FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
README.md CHANGED
@@ -297,7 +297,7 @@ model = PySRRegressor(
297
  # ^ Higher precision calculations.
298
  warm_start=True,
299
  # ^ Start from where left off.
300
- bumper=True,
301
  # ^ Faster evaluation (experimental)
302
  extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
303
  # extra_torch_mappings={sympy.cos: torch.cos},
 
297
  # ^ Higher precision calculations.
298
  warm_start=True,
299
  # ^ Start from where left off.
300
+ turbo=True,
301
  # ^ Faster evaluation (experimental)
302
  extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2},
303
  # extra_torch_mappings={sympy.cos: torch.cos},
benchmarks/hyperparamopt.py CHANGED
@@ -1,4 +1,5 @@
1
  """Start a hyperoptimization from a single node"""
 
2
  import pickle as pkl
3
  import sys
4
 
 
1
  """Start a hyperoptimization from a single node"""
2
+
3
  import pickle as pkl
4
  import sys
5
 
benchmarks/print_best_model.py CHANGED
@@ -1,4 +1,5 @@
1
  """Print the best model parameters and loss"""
 
2
  import pickle as pkl
3
  from pprint import PrettyPrinter
4
 
 
1
  """Print the best model parameters and loss"""
2
+
3
  import pickle as pkl
4
  from pprint import PrettyPrinter
5
 
docs/examples.md CHANGED
@@ -428,7 +428,7 @@ the evaluation, as we simply evaluated each argument and divided the result) int
428
  `((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
429
  `(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
430
  equal to:
431
- $\frac{x_0^2 x_1 - 2.0000073}{x_2^2 - 1.0000019}$, which
432
  is nearly the same as the true equation!
433
 
434
  ## 10. Dimensional constraints
@@ -520,6 +520,8 @@ a constant `"2.6353e-22[m s⁻²]"`.
520
 
521
  Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
522
 
 
 
523
 
524
  ## 11. Additional features
525
 
 
428
  `((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and
429
  `(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is
430
  equal to:
431
+ $\frac{x_0^2 x_1 - 2.0000073}{x_2^2 + 1.0000019}$, which
432
  is nearly the same as the true equation!
433
 
434
  ## 10. Dimensional constraints
 
520
 
521
  Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed.
522
 
523
+ Note that you can also search for exclusively dimensionless constants by settings
524
+ `dimensionless_constants_only` to `true`.
525
 
526
  ## 11. Additional features
527
 
docs/generate_papers.py CHANGED
@@ -1,4 +1,5 @@
1
  """This script generates the papers.md file from the papers.yml file."""
 
2
  from pathlib import Path
3
 
4
  import yaml
 
1
  """This script generates the papers.md file from the papers.yml file."""
2
+
3
  from pathlib import Path
4
 
5
  import yaml
environment.yml CHANGED
@@ -2,11 +2,10 @@ name: test
2
  channels:
3
  - conda-forge
4
  dependencies:
5
- - python>=3.7
6
  - sympy>=1.0.0,<2.0.0
7
  - pandas>=0.21.0,<3.0.0
8
  - numpy>=1.13.0,<2.0.0
9
  - scikit-learn>=1.0.0,<2.0.0
10
  - pyjuliacall>=0.9.15,<0.10.0
11
  - click>=7.0.0,<9.0.0
12
- - typing_extensions>=4.0.0,<5.0.0
 
2
  channels:
3
  - conda-forge
4
  dependencies:
5
+ - python>=3.8
6
  - sympy>=1.0.0,<2.0.0
7
  - pandas>=0.21.0,<3.0.0
8
  - numpy>=1.13.0,<2.0.0
9
  - scikit-learn>=1.0.0,<2.0.0
10
  - pyjuliacall>=0.9.15,<0.10.0
11
  - click>=7.0.0,<9.0.0
 
examples/pysr_demo.ipynb CHANGED
@@ -396,7 +396,7 @@
396
  "id": "wbWHyOjl2_kX"
397
  },
398
  "source": [
399
- "Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default 2). You can also define custom complexities for variables and constants (`complexity_of_variables` and `complexity_of_constants`, respectively - both take a single number).\n",
400
  "\n",
401
  "\n",
402
  "One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",
 
396
  "id": "wbWHyOjl2_kX"
397
  },
398
  "source": [
399
+ "Since `quart` is arguably more complex than the other operators, you can also give it a different complexity, using, e.g., `complexity_of_operators={\"quart\": 2}` to give it a complexity of 2 (instead of the default 1). You can also define custom complexities for variables and constants (`complexity_of_variables` and `complexity_of_constants`, respectively - both take a single number).\n",
400
  "\n",
401
  "\n",
402
  "One can also add a binary operator, with, e.g., `\"myoperator(x, y) = x^2 * y\"`. All Julia operators that work on scalar 32-bit floating point values are available.\n",
pyproject.toml CHANGED
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "pysr"
7
- version = "0.18.1"
8
  authors = [
9
  {name = "Miles Cranmer", email = "[email protected]"},
10
  ]
11
  description = "Simple and efficient symbolic regression"
12
  readme = {file = "README.md", content-type = "text/markdown"}
13
  license = {file = "LICENSE"}
14
- requires-python = ">=3.7"
15
  classifiers = [
16
  "Programming Language :: Python :: 3",
17
  "Operating System :: OS Independent",
@@ -29,3 +29,17 @@ dependencies = {file = "requirements.txt"}
29
 
30
  [tool.isort]
31
  profile = "black"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  [project]
6
  name = "pysr"
7
+ version = "0.19.0"
8
  authors = [
9
  {name = "Miles Cranmer", email = "[email protected]"},
10
  ]
11
  description = "Simple and efficient symbolic regression"
12
  readme = {file = "README.md", content-type = "text/markdown"}
13
  license = {file = "LICENSE"}
14
+ requires-python = ">=3.8"
15
  classifiers = [
16
  "Programming Language :: Python :: 3",
17
  "Operating System :: OS Independent",
 
29
 
30
  [tool.isort]
31
  profile = "black"
32
+
33
+ [tool.rye]
34
+ dev-dependencies = [
35
+ "pre-commit>=3.7.0",
36
+ "ipython>=8.23.0",
37
+ "ipykernel>=6.29.4",
38
+ "mypy>=1.10.0",
39
+ "jax[cpu]>=0.4.26",
40
+ "torch>=2.3.0",
41
+ "pandas-stubs>=2.2.1.240316",
42
+ "types-pytz>=2024.1.0.20240417",
43
+ "types-openpyxl>=3.1.0.20240428",
44
+ "coverage>=7.5.3",
45
+ ]
pysr/denoising.py CHANGED
@@ -1,8 +1,17 @@
1
  """Functions for denoising data during preprocessing."""
 
 
 
2
  import numpy as np
 
3
 
4
 
5
- def denoise(X, y, Xresampled=None, random_state=None):
 
 
 
 
 
6
  """Denoise the dataset using a Gaussian process."""
7
  from sklearn.gaussian_process import GaussianProcessRegressor
8
  from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
@@ -14,12 +23,17 @@ def denoise(X, y, Xresampled=None, random_state=None):
14
  gpr.fit(X, y)
15
 
16
  if Xresampled is not None:
17
- return Xresampled, gpr.predict(Xresampled)
18
 
19
- return X, gpr.predict(X)
20
 
21
 
22
- def multi_denoise(X, y, Xresampled=None, random_state=None):
 
 
 
 
 
23
  """Perform `denoise` along each column of `y` independently."""
24
  y = np.stack(
25
  [
 
1
  """Functions for denoising data during preprocessing."""
2
+
3
+ from typing import Optional, Tuple, cast
4
+
5
  import numpy as np
6
+ from numpy import ndarray
7
 
8
 
9
+ def denoise(
10
+ X: ndarray,
11
+ y: ndarray,
12
+ Xresampled: Optional[ndarray] = None,
13
+ random_state: Optional[np.random.RandomState] = None,
14
+ ) -> Tuple[ndarray, ndarray]:
15
  """Denoise the dataset using a Gaussian process."""
16
  from sklearn.gaussian_process import GaussianProcessRegressor
17
  from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
 
23
  gpr.fit(X, y)
24
 
25
  if Xresampled is not None:
26
+ return Xresampled, cast(ndarray, gpr.predict(Xresampled))
27
 
28
+ return X, cast(ndarray, gpr.predict(X))
29
 
30
 
31
+ def multi_denoise(
32
+ X: ndarray,
33
+ y: ndarray,
34
+ Xresampled: Optional[ndarray] = None,
35
+ random_state: Optional[np.random.RandomState] = None,
36
+ ):
37
  """Perform `denoise` along each column of `y` independently."""
38
  y = np.stack(
39
  [
pysr/deprecated.py CHANGED
@@ -1,4 +1,5 @@
1
  """Various functions to deprecate features."""
 
2
  import warnings
3
 
4
  from .julia_import import jl
 
1
  """Various functions to deprecate features."""
2
+
3
  import warnings
4
 
5
  from .julia_import import jl
pysr/export_jax.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import sympy
2
 
3
  # Special since need to reduce arguments.
@@ -55,7 +56,9 @@ def sympy2jaxtext(expr, parameters, symbols_in, extra_jax_mappings=None):
55
  if issubclass(expr.func, sympy.Float):
56
  parameters.append(float(expr))
57
  return f"parameters[{len(parameters) - 1}]"
58
- elif issubclass(expr.func, sympy.Rational):
 
 
59
  return f"{float(expr)}"
60
  elif issubclass(expr.func, sympy.Integer):
61
  return f"{int(expr)}"
 
1
+ import numpy as np # noqa: F401
2
  import sympy
3
 
4
  # Special since need to reduce arguments.
 
56
  if issubclass(expr.func, sympy.Float):
57
  parameters.append(float(expr))
58
  return f"parameters[{len(parameters) - 1}]"
59
+ elif issubclass(expr.func, sympy.Rational) or issubclass(
60
+ expr.func, sympy.NumberSymbol
61
+ ):
62
  return f"{float(expr)}"
63
  elif issubclass(expr.func, sympy.Integer):
64
  return f"{int(expr)}"
pysr/export_latex.py CHANGED
@@ -1,4 +1,5 @@
1
  """Functions to help export PySR equations to LaTeX."""
 
2
  from typing import List, Optional, Tuple
3
 
4
  import pandas as pd
@@ -152,3 +153,15 @@ def sympy2multilatextable(
152
  ]
153
 
154
  return "\n\n".join(latex_tables)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Functions to help export PySR equations to LaTeX."""
2
+
3
  from typing import List, Optional, Tuple
4
 
5
  import pandas as pd
 
153
  ]
154
 
155
  return "\n\n".join(latex_tables)
156
+
157
+
158
+ def with_preamble(table_string: str) -> str:
159
+ preamble_string = [
160
+ r"\usepackage{breqn}",
161
+ r"\usepackage{booktabs}",
162
+ "",
163
+ "...",
164
+ "",
165
+ table_string,
166
+ ]
167
+ return "\n".join(preamble_string)
pysr/export_numpy.py CHANGED
@@ -1,9 +1,12 @@
1
  """Code for exporting discovered expressions to numpy"""
 
2
  import warnings
 
3
 
4
  import numpy as np
5
  import pandas as pd
6
- from sympy import lambdify
 
7
 
8
 
9
  def sympy2numpy(eqn, sympy_symbols, *, selection=None):
@@ -13,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
13
  class CallableEquation:
14
  """Simple wrapper for numpy lambda functions built with sympy"""
15
 
 
 
 
 
16
  def __init__(self, eqn, sympy_symbols, selection=None):
17
  self._sympy = eqn
18
  self._sympy_symbols = sympy_symbols
@@ -28,8 +35,9 @@ class CallableEquation:
28
  return self._lambda(
29
  **{k: X[k].values for k in map(str, self._sympy_symbols)}
30
  ) * np.ones(expected_shape)
 
31
  if self._selection is not None:
32
- if X.shape[1] != len(self._selection):
33
  warnings.warn(
34
  "`X` should be of shape (n_samples, len(self._selection)). "
35
  "Automatically filtering `X` to selection. "
@@ -37,6 +45,7 @@ class CallableEquation:
37
  "this may lead to incorrect predictions and other errors."
38
  )
39
  X = X[:, self._selection]
 
40
  return self._lambda(*X.T) * np.ones(expected_shape)
41
 
42
  @property
 
1
  """Code for exporting discovered expressions to numpy"""
2
+
3
  import warnings
4
+ from typing import List, Union
5
 
6
  import numpy as np
7
  import pandas as pd
8
+ from numpy.typing import NDArray
9
+ from sympy import Expr, Symbol, lambdify
10
 
11
 
12
  def sympy2numpy(eqn, sympy_symbols, *, selection=None):
 
16
  class CallableEquation:
17
  """Simple wrapper for numpy lambda functions built with sympy"""
18
 
19
+ _sympy: Expr
20
+ _sympy_symbols: List[Symbol]
21
+ _selection: Union[NDArray[np.bool_], None]
22
+
23
  def __init__(self, eqn, sympy_symbols, selection=None):
24
  self._sympy = eqn
25
  self._sympy_symbols = sympy_symbols
 
35
  return self._lambda(
36
  **{k: X[k].values for k in map(str, self._sympy_symbols)}
37
  ) * np.ones(expected_shape)
38
+
39
  if self._selection is not None:
40
+ if X.shape[1] != self._selection.sum():
41
  warnings.warn(
42
  "`X` should be of shape (n_samples, len(self._selection)). "
43
  "Automatically filtering `X` to selection. "
 
45
  "this may lead to incorrect predictions and other errors."
46
  )
47
  X = X[:, self._selection]
48
+
49
  return self._lambda(*X.T) * np.ones(expected_shape)
50
 
51
  @property
pysr/export_sympy.py CHANGED
@@ -1,9 +1,12 @@
1
  """Define utilities to export to sympy"""
 
2
  from typing import Callable, Dict, List, Optional
3
 
4
  import sympy
5
  from sympy import sympify
6
 
 
 
7
  sympy_mappings = {
8
  "div": lambda x, y: x / y,
9
  "mult": lambda x, y: x * y,
@@ -29,8 +32,8 @@ sympy_mappings = {
29
  "acosh": lambda x: sympy.acosh(x),
30
  "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
31
  "asinh": sympy.asinh,
32
- "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
33
- "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
34
  "abs": abs,
35
  "mod": sympy.Mod,
36
  "erf": sympy.erf,
@@ -50,6 +53,7 @@ sympy_mappings = {
50
  "round": lambda x: sympy.ceiling(x - 0.5),
51
  "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
52
  "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
 
53
  "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
54
  "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
55
  "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
@@ -58,13 +62,13 @@ sympy_mappings = {
58
 
59
 
60
  def create_sympy_symbols_map(
61
- feature_names_in: List[str],
62
  ) -> Dict[str, sympy.Symbol]:
63
  return {variable: sympy.Symbol(variable) for variable in feature_names_in}
64
 
65
 
66
  def create_sympy_symbols(
67
- feature_names_in: List[str],
68
  ) -> List[sympy.Symbol]:
69
  return [sympy.Symbol(variable) for variable in feature_names_in]
70
 
@@ -72,7 +76,7 @@ def create_sympy_symbols(
72
  def pysr2sympy(
73
  equation: str,
74
  *,
75
- feature_names_in: Optional[List[str]] = None,
76
  extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
77
  ):
78
  if feature_names_in is None:
@@ -83,7 +87,12 @@ def pysr2sympy(
83
  **sympy_mappings,
84
  }
85
 
86
- return sympify(equation, locals=local_sympy_mappings)
 
 
 
 
 
87
 
88
 
89
  def assert_valid_sympy_symbol(var_name: str) -> None:
 
1
  """Define utilities to export to sympy"""
2
+
3
  from typing import Callable, Dict, List, Optional
4
 
5
  import sympy
6
  from sympy import sympify
7
 
8
+ from .utils import ArrayLike
9
+
10
  sympy_mappings = {
11
  "div": lambda x, y: x / y,
12
  "mult": lambda x, y: x * y,
 
32
  "acosh": lambda x: sympy.acosh(x),
33
  "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
34
  "asinh": sympy.asinh,
35
+ "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
36
+ "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
37
  "abs": abs,
38
  "mod": sympy.Mod,
39
  "erf": sympy.erf,
 
53
  "round": lambda x: sympy.ceiling(x - 0.5),
54
  "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
55
  "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
56
+ "greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)),
57
  "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
58
  "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
59
  "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
 
62
 
63
 
64
  def create_sympy_symbols_map(
65
+ feature_names_in: ArrayLike[str],
66
  ) -> Dict[str, sympy.Symbol]:
67
  return {variable: sympy.Symbol(variable) for variable in feature_names_in}
68
 
69
 
70
  def create_sympy_symbols(
71
+ feature_names_in: ArrayLike[str],
72
  ) -> List[sympy.Symbol]:
73
  return [sympy.Symbol(variable) for variable in feature_names_in]
74
 
 
76
  def pysr2sympy(
77
  equation: str,
78
  *,
79
+ feature_names_in: Optional[ArrayLike[str]] = None,
80
  extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
81
  ):
82
  if feature_names_in is None:
 
87
  **sympy_mappings,
88
  }
89
 
90
+ try:
91
+ return sympify(equation, locals=local_sympy_mappings, evaluate=False)
92
+ except TypeError as e:
93
+ if "got an unexpected keyword argument 'evaluate'" in str(e):
94
+ return sympify(equation, locals=local_sympy_mappings)
95
+ raise TypeError(f"Error processing equation '{equation}'") from e
96
 
97
 
98
  def assert_valid_sympy_symbol(var_name: str) -> None:
pysr/export_torch.py CHANGED
@@ -1,11 +1,9 @@
1
- #####
2
- # From https://github.com/patrick-kidger/sympytorch
3
- # Copied here to allow PySR-specific tweaks
4
- #####
5
 
6
  import collections as co
7
  import functools as ft
8
 
 
9
  import sympy
10
 
11
 
@@ -84,7 +82,7 @@ def _initialize_torch():
84
  }
85
 
86
  class _Node(torch.nn.Module):
87
- """SympyTorch code from https://github.com/patrick-kidger/sympytorch"""
88
 
89
  def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
90
  super().__init__(**kwargs)
@@ -116,6 +114,11 @@ def _initialize_torch():
116
  self._value = int(expr)
117
  self._torch_func = lambda: self._value
118
  self._args = ()
 
 
 
 
 
119
  elif issubclass(expr.func, sympy.Symbol):
120
  self._name = expr.name
121
  self._torch_func = lambda value: value
@@ -156,7 +159,7 @@ def _initialize_torch():
156
  return self._torch_func(*args)
157
 
158
  class _SingleSymPyModule(torch.nn.Module):
159
- """SympyTorch code from https://github.com/patrick-kidger/sympytorch"""
160
 
161
  def __init__(
162
  self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs
 
1
+ # Fork of https://github.com/patrick-kidger/sympytorch
 
 
 
2
 
3
  import collections as co
4
  import functools as ft
5
 
6
+ import numpy as np # noqa: F401
7
  import sympy
8
 
9
 
 
82
  }
83
 
84
  class _Node(torch.nn.Module):
85
+ """Forked from https://github.com/patrick-kidger/sympytorch"""
86
 
87
  def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
88
  super().__init__(**kwargs)
 
114
  self._value = int(expr)
115
  self._torch_func = lambda: self._value
116
  self._args = ()
117
+ elif issubclass(expr.func, sympy.NumberSymbol):
118
+ # Can get here from exp(1) or exact pi
119
+ self._value = float(expr)
120
+ self._torch_func = lambda: self._value
121
+ self._args = ()
122
  elif issubclass(expr.func, sympy.Symbol):
123
  self._name = expr.name
124
  self._torch_func = lambda value: value
 
159
  return self._torch_func(*args)
160
 
161
  class _SingleSymPyModule(torch.nn.Module):
162
+ """Forked from https://github.com/patrick-kidger/sympytorch"""
163
 
164
  def __init__(
165
  self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs
pysr/feature_selection.py CHANGED
@@ -1,8 +1,20 @@
1
  """Functions for doing feature selection during preprocessing."""
 
 
 
2
  import numpy as np
 
 
 
 
3
 
4
 
5
- def run_feature_selection(X, y, select_k_features, random_state=None):
 
 
 
 
 
6
  """
7
  Find most important features.
8
 
@@ -20,11 +32,16 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
20
  selector = SelectFromModel(
21
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
22
  )
23
- return selector.get_support(indices=True)
24
 
25
 
26
  # Function has not been removed only due to usage in module tests
27
- def _handle_feature_selection(X, select_k_features, y, variable_names):
 
 
 
 
 
28
  if select_k_features is not None:
29
  selection = run_feature_selection(X, y, select_k_features)
30
  print(f"Using features {[variable_names[i] for i in selection]}")
 
1
  """Functions for doing feature selection during preprocessing."""
2
+
3
+ from typing import Optional, cast
4
+
5
  import numpy as np
6
+ from numpy import ndarray
7
+ from numpy.typing import NDArray
8
+
9
+ from .utils import ArrayLike
10
 
11
 
12
+ def run_feature_selection(
13
+ X: ndarray,
14
+ y: ndarray,
15
+ select_k_features: int,
16
+ random_state: Optional[np.random.RandomState] = None,
17
+ ) -> NDArray[np.bool_]:
18
  """
19
  Find most important features.
20
 
 
32
  selector = SelectFromModel(
33
  clf, threshold=-np.inf, max_features=select_k_features, prefit=True
34
  )
35
+ return cast(NDArray[np.bool_], selector.get_support(indices=False))
36
 
37
 
38
  # Function has not been removed only due to usage in module tests
39
+ def _handle_feature_selection(
40
+ X: ndarray,
41
+ select_k_features: Optional[int],
42
+ y: ndarray,
43
+ variable_names: ArrayLike[str],
44
+ ):
45
  if select_k_features is not None:
46
  selection = run_feature_selection(X, y, select_k_features)
47
  print(f"Using features {[variable_names[i] for i in selection]}")
pysr/julia_helpers.py CHANGED
@@ -1,11 +1,16 @@
1
  """Functions for initializing the Julia environment and installing deps."""
2
 
 
 
3
  import numpy as np
4
  from juliacall import convert as jl_convert # type: ignore
 
5
 
6
  from .deprecated import init_julia, install
7
  from .julia_import import jl
8
 
 
 
9
  jl.seval("using Serialization: Serialization")
10
  jl.seval("using PythonCall: PythonCall")
11
 
@@ -22,24 +27,31 @@ def _escape_filename(filename):
22
  return str_repr
23
 
24
 
25
- def _load_cluster_manager(cluster_manager):
26
  jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
27
  return jl.seval(f"addprocs_{cluster_manager}")
28
 
29
 
30
- def jl_array(x):
31
  if x is None:
32
  return None
33
- return jl_convert(jl.Array, x)
 
 
 
 
 
 
 
34
 
35
 
36
- def jl_serialize(obj):
37
  buf = jl.IOBuffer()
38
  Serialization.serialize(buf, obj)
39
  return np.array(jl.take_b(buf))
40
 
41
 
42
- def jl_deserialize(s):
43
  if s is None:
44
  return s
45
  buf = jl.IOBuffer()
 
1
  """Functions for initializing the Julia environment and installing deps."""
2
 
3
+ from typing import Any, Callable, Union, cast
4
+
5
  import numpy as np
6
  from juliacall import convert as jl_convert # type: ignore
7
+ from numpy.typing import NDArray
8
 
9
  from .deprecated import init_julia, install
10
  from .julia_import import jl
11
 
12
+ jl_convert = cast(Callable[[Any, Any], Any], jl_convert)
13
+
14
  jl.seval("using Serialization: Serialization")
15
  jl.seval("using PythonCall: PythonCall")
16
 
 
27
  return str_repr
28
 
29
 
30
+ def _load_cluster_manager(cluster_manager: str):
31
  jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
32
  return jl.seval(f"addprocs_{cluster_manager}")
33
 
34
 
35
+ def jl_array(x, dtype=None):
36
  if x is None:
37
  return None
38
+ elif dtype is None:
39
+ return jl_convert(jl.Array, x)
40
+ else:
41
+ return jl_convert(jl.Array[dtype], x)
42
+
43
+
44
+ def jl_is_function(f) -> bool:
45
+ return cast(bool, jl.seval("op -> op isa Function")(f))
46
 
47
 
48
+ def jl_serialize(obj: Any) -> NDArray[np.uint8]:
49
  buf = jl.IOBuffer()
50
  Serialization.serialize(buf, obj)
51
  return np.array(jl.take_b(buf))
52
 
53
 
54
+ def jl_deserialize(s: Union[NDArray[np.uint8], None]):
55
  if s is None:
56
  return s
57
  buf = jl.IOBuffer()
pysr/julia_import.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import sys
3
  import warnings
 
 
4
 
5
  # Check if JuliaCall is already loaded, and if so, warn the user
6
  # about the relevant environment variables. If not loaded,
@@ -35,31 +37,17 @@ else:
35
  os.environ[k] = os.environ.get(k, default)
36
 
37
 
38
- from juliacall import Main as jl # type: ignore
39
-
40
- jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
 
41
 
42
- # Next, automatically load the juliacall extension if we're in a Jupyter notebook
43
- autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS", "yes")
44
- if autoload_extensions in {"yes", ""} and jl_version >= (1, 9, 0):
45
- try:
46
- get_ipython = sys.modules["IPython"].get_ipython
47
 
48
- if "IPKernelApp" not in get_ipython().config:
49
- raise ImportError("console")
50
 
51
- print(
52
- "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable."
53
- )
54
 
55
- # TODO: Turn this off if juliacall does this automatically
56
- get_ipython().run_line_magic("load_ext", "juliacall")
57
- except Exception:
58
- pass
59
- elif autoload_extensions not in {"no", "yes", ""}:
60
- warnings.warn(
61
- "PYSR_AUTOLOAD_EXTENSIONS environment variable is set to something other than 'yes' or 'no' or ''."
62
- )
63
 
64
  jl.seval("using SymbolicRegression")
65
  SymbolicRegression = jl.SymbolicRegression
 
1
  import os
2
  import sys
3
  import warnings
4
+ from types import ModuleType
5
+ from typing import cast
6
 
7
  # Check if JuliaCall is already loaded, and if so, warn the user
8
  # about the relevant environment variables. If not loaded,
 
37
  os.environ[k] = os.environ.get(k, default)
38
 
39
 
40
+ autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS")
41
+ if autoload_extensions is not None:
42
+ # Deprecated; so just pass to juliacall
43
+ os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions
44
 
45
+ from juliacall import Main as jl # type: ignore
 
 
 
 
46
 
47
+ jl = cast(ModuleType, jl)
 
48
 
 
 
 
49
 
50
+ jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
 
 
 
 
 
 
 
51
 
52
  jl.seval("using SymbolicRegression")
53
  SymbolicRegression = jl.SymbolicRegression
pysr/juliapkg.json CHANGED
@@ -3,7 +3,7 @@
3
  "packages": {
4
  "SymbolicRegression": {
5
  "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
6
- "version": "=0.24.1"
7
  },
8
  "Serialization": {
9
  "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
 
3
  "packages": {
4
  "SymbolicRegression": {
5
  "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
6
+ "version": "=0.24.5"
7
  },
8
  "Serialization": {
9
  "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
pysr/param_groupings.yml CHANGED
@@ -14,6 +14,7 @@
14
  - loss_function
15
  - model_selection
16
  - dimensional_constraint_penalty
 
17
  - Working with Complexities:
18
  - parsimony
19
  - constraints
 
14
  - loss_function
15
  - model_selection
16
  - dimensional_constraint_penalty
17
+ - dimensionless_constants_only
18
  - Working with Complexities:
19
  - parsimony
20
  - constraints
pysr/sklearn_monkeypatch.py CHANGED
@@ -3,8 +3,7 @@
3
  from sklearn.utils import validation
4
 
5
 
6
- def _ensure_no_complex_data(*args, **kwargs):
7
- ...
8
 
9
 
10
  try:
 
3
  from sklearn.utils import validation
4
 
5
 
6
+ def _ensure_no_complex_data(*args, **kwargs): ...
 
7
 
8
 
9
  try:
pysr/sr.py CHANGED
@@ -8,27 +8,31 @@ import shutil
8
  import sys
9
  import tempfile
10
  import warnings
 
11
  from datetime import datetime
12
  from io import StringIO
13
  from multiprocessing import cpu_count
14
  from pathlib import Path
15
- from typing import Callable, Dict, List, Optional, Tuple, Union
16
-
17
- if sys.version_info >= (3, 8):
18
- from typing import Literal
19
- else:
20
- from typing_extensions import Literal
21
 
22
  import numpy as np
23
  import pandas as pd
 
 
24
  from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
25
  from sklearn.utils import check_array, check_consistent_length, check_random_state
26
- from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
 
27
 
28
  from .denoising import denoise, multi_denoise
29
  from .deprecated import DEPRECATED_KWARGS
30
  from .export_jax import sympy2jax
31
- from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
 
 
 
 
 
32
  from .export_numpy import sympy2numpy
33
  from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
34
  from .export_torch import sympy2torch
@@ -40,17 +44,21 @@ from .julia_helpers import (
40
  _load_cluster_manager,
41
  jl_array,
42
  jl_deserialize,
 
43
  jl_serialize,
44
  )
45
  from .julia_import import SymbolicRegression, jl
46
  from .utils import (
 
 
47
  _csv_filename_to_pkl_filename,
48
  _preprocess_julia_floats,
49
  _safe_check_feature_names_in,
50
  _subscriptify,
 
51
  )
52
 
53
- already_ran = False
54
 
55
 
56
  def _process_constraints(binary_operators, unary_operators, constraints):
@@ -113,7 +121,7 @@ def _maybe_create_inline_operators(
113
  "and underscores are allowed."
114
  )
115
  if (extra_sympy_mappings is None) or (
116
- not function_name in extra_sympy_mappings
117
  ):
118
  raise ValueError(
119
  f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
@@ -130,6 +138,7 @@ def _check_assertions(
130
  X,
131
  use_custom_variable_names,
132
  variable_names,
 
133
  weights,
134
  y,
135
  X_units,
@@ -154,6 +163,13 @@ def _check_assertions(
154
  "and underscores are allowed."
155
  )
156
  assert_valid_sympy_symbol(var_name)
 
 
 
 
 
 
 
157
  if X_units is not None and len(X_units) != X.shape[1]:
158
  raise ValueError(
159
  "The number of units in `X_units` must equal the number of features in `X`."
@@ -178,6 +194,21 @@ def _check_assertions(
178
  VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
179
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
182
  """
183
  High-performance symbolic regression algorithm.
@@ -309,7 +340,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
309
  `idx` argument to the function, which is `nothing`
310
  for non-batched, and a 1D array of indices for batched.
311
  Default is `None`.
312
- complexity_of_operators : dict[str, float]
313
  If you would like to use a complexity other than 1 for an
314
  operator, specify the complexity here. For example,
315
  `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
@@ -318,16 +349,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
318
  numbers for a complexity, and the total complexity of a tree
319
  will be rounded to the nearest integer after computing.
320
  Default is `None`.
321
- complexity_of_constants : float
322
  Complexity of constants. Default is `1`.
323
- complexity_of_variables : float
324
- Complexity of variables. Default is `1`.
 
 
 
325
  parsimony : float
326
  Multiplicative factor for how much to punish complexity.
327
  Default is `0.0032`.
328
  dimensional_constraint_penalty : float
329
  Additive penalty for if dimensional analysis of an expression fails.
330
  By default, this is `1000.0`.
 
 
 
331
  use_frequency : bool
332
  Whether to measure the frequency of complexities, and use that
333
  instead of parsimony to explore equation space. Will naturally
@@ -603,22 +640,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
603
  Units of each variable in the training dataset, `y`.
604
  nout_ : int
605
  Number of output dimensions.
606
- selection_mask_ : list[int] of length `select_k_features`
607
- List of indices for input features that are selected when
608
- `select_k_features` is set.
609
  tempdir_ : Path
610
  Path to the temporary equations directory.
611
- equation_file_ : str
612
  Output equation file name produced by the julia backend.
613
  julia_state_stream_ : ndarray
614
  The serialized state for the julia SymbolicRegression.jl backend (after fitting),
615
  stored as an array of uint8, produced by Julia's Serialization.serialize function.
616
- julia_state_
617
- The deserialized state.
618
  julia_options_stream_ : ndarray
619
  The serialized julia options, stored as an array of uint8,
620
- julia_options_
621
- The deserialized julia options.
622
  equation_file_contents_ : list[pandas.DataFrame]
623
  Contents of the equation file output by the Julia backend.
624
  show_pickle_warnings_ : bool
@@ -665,6 +697,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
665
  ```
666
  """
667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  def __init__(
669
  self,
670
  model_selection: Literal["best", "accuracy", "score"] = "best",
@@ -685,9 +733,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
685
  loss_function: Optional[str] = None,
686
  complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
687
  complexity_of_constants: Union[int, float] = 1,
688
- complexity_of_variables: Union[int, float] = 1,
689
  parsimony: float = 0.0032,
690
  dimensional_constraint_penalty: Optional[float] = None,
 
691
  use_frequency: bool = True,
692
  use_frequency_in_tournament: bool = True,
693
  adaptive_parsimony_scaling: float = 20.0,
@@ -783,6 +832,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
783
  self.complexity_of_variables = complexity_of_variables
784
  self.parsimony = parsimony
785
  self.dimensional_constraint_penalty = dimensional_constraint_penalty
 
786
  self.use_frequency = use_frequency
787
  self.use_frequency_in_tournament = use_frequency_in_tournament
788
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
@@ -863,15 +913,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
863
  updated_kwarg_name = DEPRECATED_KWARGS[k]
864
  setattr(self, updated_kwarg_name, v)
865
  warnings.warn(
866
- f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
867
  "Please use that instead.",
868
  FutureWarning,
869
  )
870
  # Handle kwargs that have been moved to the fit method
871
  elif k in ["weights", "variable_names", "Xresampled"]:
872
  warnings.warn(
873
- f"{k} is a data dependant parameter so should be passed when fit is called. "
874
- f"Ignoring parameter; please pass {k} during the call to fit instead.",
875
  FutureWarning,
876
  )
877
  elif k == "julia_project":
@@ -888,21 +938,25 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
888
  FutureWarning,
889
  )
890
  else:
891
- raise TypeError(
892
- f"{k} is not a valid keyword argument for PySRRegressor."
 
893
  )
 
 
 
894
 
895
  @classmethod
896
  def from_file(
897
  cls,
898
- equation_file,
899
  *,
900
- binary_operators=None,
901
- unary_operators=None,
902
- n_features_in=None,
903
- feature_names_in=None,
904
- selection_mask=None,
905
- nout=1,
906
  verbosity=1,
907
  **pysr_kwargs,
908
  ):
@@ -911,7 +965,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
911
 
912
  Parameters
913
  ----------
914
- equation_file : str
915
  Path to a pickle file containing a saved model, or a csv file
916
  containing equations.
917
  binary_operators : list[str]
@@ -926,8 +980,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
926
  feature_names_in : list[str]
927
  Names of the features passed to the model.
928
  Not needed if loading from a pickle file.
929
- selection_mask : list[bool]
930
- If using select_k_features, you must pass `model.selection_mask_` here.
931
  Not needed if loading from a pickle file.
932
  nout : int
933
  Number of outputs of the model.
@@ -983,7 +1037,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
983
 
984
  # TODO: copy .bkup file if exists.
985
  model = cls(
986
- equation_file=equation_file,
987
  binary_operators=binary_operators,
988
  unary_operators=unary_operators,
989
  **pysr_kwargs,
@@ -1003,7 +1057,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1003
  model.display_feature_names_in_ = feature_names_in
1004
 
1005
  if selection_mask is None:
1006
- model.selection_mask_ = np.ones(n_features_in, dtype=bool)
1007
  else:
1008
  model.selection_mask_ = selection_mask
1009
 
@@ -1030,7 +1084,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1030
  all_equations = equations
1031
 
1032
  for i, equations in enumerate(all_equations):
1033
- selected = ["" for _ in range(len(equations))]
1034
  chosen_row = idx_model_selection(equations, self.model_selection)
1035
  selected[chosen_row] = ">>>>"
1036
  repr_equations = pd.DataFrame(
@@ -1063,15 +1117,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1063
  Handle pickle serialization for PySRRegressor.
1064
 
1065
  The Scikit-learn standard requires estimators to be serializable via
1066
- `pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
1067
- serialization.
1068
-
1069
- Thus, for `PySRRegressor` to support pickle serialization, the
1070
- `julia_state_stream_` attribute must be hidden from pickle. This will
1071
- prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
1072
- but does allow all other attributes of a fitted `PySRRegressor` estimator
1073
- to be serialized. Note: Jax and Torch format equations are also removed
1074
- from the pickled instance.
1075
  """
1076
  state = self.__dict__
1077
  show_pickle_warning = not (
@@ -1137,10 +1184,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1137
 
1138
  @property
1139
  def julia_options_(self):
 
1140
  return jl_deserialize(self.julia_options_stream_)
1141
 
1142
  @property
1143
  def julia_state_(self):
 
1144
  return jl_deserialize(self.julia_state_stream_)
1145
 
1146
  @property
@@ -1153,7 +1202,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1153
  )
1154
  return self.julia_state_
1155
 
1156
- def get_best(self, index=None):
1157
  """
1158
  Get best equation using `model_selection`.
1159
 
@@ -1176,8 +1225,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1176
  Raised when an invalid model selection strategy is provided.
1177
  """
1178
  check_is_fitted(self, attributes=["equations_"])
1179
- if self.equations_ is None:
1180
- raise ValueError("No equations have been generated yet.")
1181
 
1182
  if index is not None:
1183
  if isinstance(self.equations_, list):
@@ -1185,16 +1232,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1185
  index, list
1186
  ), "With multiple output features, index must be a list."
1187
  return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
1188
- return self.equations_.iloc[index]
 
 
1189
 
1190
  if isinstance(self.equations_, list):
1191
  return [
1192
- eq.iloc[idx_model_selection(eq, self.model_selection)]
1193
  for eq in self.equations_
1194
  ]
1195
- return self.equations_.iloc[
1196
- idx_model_selection(self.equations_, self.model_selection)
1197
- ]
 
 
 
1198
 
1199
  def _setup_equation_file(self):
1200
  """
@@ -1219,7 +1271,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1219
  self.equation_file_ = self.equation_file
1220
  self.equation_file_contents_ = None
1221
 
1222
- def _validate_and_set_init_params(self):
1223
  """
1224
  Ensure parameters passed at initialization are valid.
1225
 
@@ -1277,59 +1329,57 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1277
  f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
1278
  )
1279
 
1280
- progress = self.progress
1281
- # 'Mutable' parameter validation
1282
- # (Params and their default values, if None is given:)
1283
- default_param_mapping = {
1284
- "binary_operators": "+ * - /".split(" "),
1285
- "unary_operators": [],
1286
- "maxdepth": self.maxsize,
1287
- "constraints": {},
1288
- "multithreading": self.procs != 0 and self.cluster_manager is None,
1289
- "batch_size": 1,
1290
- "update_verbosity": int(self.verbosity),
1291
- "progress": progress,
1292
- }
1293
- packed_modified_params = {}
1294
- for parameter, default_value in default_param_mapping.items():
1295
- parameter_value = getattr(self, parameter)
1296
- if parameter_value is None:
1297
- parameter_value = default_value
1298
  else:
1299
- # Special cases such as when binary_operators is a string
1300
- if parameter in ["binary_operators", "unary_operators"] and isinstance(
1301
- parameter_value, str
1302
- ):
1303
- parameter_value = [parameter_value]
1304
- elif parameter == "batch_size" and parameter_value < 1:
1305
- warnings.warn(
1306
- "Given `batch_size` must be greater than or equal to one. "
1307
- "`batch_size` has been increased to equal one."
1308
- )
1309
- parameter_value = 1
1310
- elif (
1311
- parameter == "progress"
1312
- and parameter_value
1313
- and "buffer" not in sys.stdout.__dir__()
1314
- ):
1315
- warnings.warn(
1316
- "Note: it looks like you are running in Jupyter. "
1317
- "The progress bar will be turned off."
1318
- )
1319
- parameter_value = False
1320
- packed_modified_params[parameter] = parameter_value
1321
 
1322
  assert (
1323
- len(packed_modified_params["binary_operators"])
1324
- + len(packed_modified_params["unary_operators"])
1325
- > 0
1326
- )
1327
 
1328
- return packed_modified_params
1329
 
1330
  def _validate_and_set_fit_params(
1331
- self, X, y, Xresampled, weights, variable_names, X_units, y_units
1332
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1333
  """
1334
  Validate the parameters passed to the :term`fit` method.
1335
 
@@ -1349,12 +1399,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1349
  Weight array of the same shape as `y`.
1350
  Each element is how to weight the mean-square-error loss
1351
  for that particular element of y.
1352
- variable_names : list[str] of length n_features
1353
- Names of each variable in the training dataset, `X`.
 
 
1354
  X_units : list[str] of length n_features
1355
- Units of each variable in the training dataset, `X`.
1356
  y_units : str | list[str] of length n_out
1357
- Units of each variable in the training dataset, `y`.
1358
 
1359
  Returns
1360
  -------
@@ -1398,6 +1450,22 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1398
  "Please use valid names instead."
1399
  )
1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1401
  # Data validation and feature name fetching via sklearn
1402
  # This method sets the n_features_in_ attribute
1403
  if Xresampled is not None:
@@ -1405,7 +1473,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1405
  if weights is not None:
1406
  weights = check_array(weights, ensure_2d=False)
1407
  check_consistent_length(weights, y)
1408
- X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1409
  self.feature_names_in_ = _safe_check_feature_names_in(
1410
  self, variable_names, generate_names=False
1411
  )
@@ -1415,10 +1483,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1415
  self.display_feature_names_in_ = np.array(
1416
  [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1417
  )
 
1418
  else:
1419
  self.display_feature_names_in_ = self.feature_names_in_
1420
-
1421
- variable_names = self.feature_names_in_
1422
 
1423
  # Handle multioutput data
1424
  if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
@@ -1428,13 +1496,39 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1428
  else:
1429
  raise NotImplementedError("y shape not supported!")
1430
 
 
1431
  self.X_units_ = copy.deepcopy(X_units)
1432
  self.y_units_ = copy.deepcopy(y_units)
1433
 
1434
- return X, y, Xresampled, weights, variable_names, X_units, y_units
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435
 
1436
  def _pre_transform_training_data(
1437
- self, X, y, Xresampled, variable_names, X_units, y_units, random_state
 
 
 
 
 
 
 
 
1438
  ):
1439
  """
1440
  Transform the training data before fitting the symbolic regressor.
@@ -1443,17 +1537,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1443
 
1444
  Parameters
1445
  ----------
1446
- X : ndarray | pandas.DataFrame
1447
  Training data of shape (n_samples, n_features).
1448
- y : ndarray | pandas.DataFrame
1449
  Target values of shape (n_samples,) or (n_samples, n_targets).
1450
  Will be cast to X's dtype if necessary.
1451
- Xresampled : ndarray | pandas.DataFrame
1452
  Resampled training data, of shape `(n_resampled, n_features)`,
1453
  used for denoising.
1454
  variable_names : list[str]
1455
  Names of each variable in the training dataset, `X`.
1456
  Of length `n_features`.
 
 
1457
  X_units : list[str]
1458
  Units of each variable in the training dataset, `X`.
1459
  y_units : str | list[str]
@@ -1486,24 +1582,43 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1486
  """
1487
  # Feature selection transformation
1488
  if self.select_k_features:
1489
- self.selection_mask_ = run_feature_selection(
1490
  X, y, self.select_k_features, random_state=random_state
1491
  )
1492
- X = X[:, self.selection_mask_]
1493
 
1494
  if Xresampled is not None:
1495
- Xresampled = Xresampled[:, self.selection_mask_]
1496
 
1497
  # Reduce variable_names to selection
1498
- variable_names = [variable_names[i] for i in self.selection_mask_]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499
 
1500
  if X_units is not None:
1501
- X_units = [X_units[i] for i in self.selection_mask_]
 
 
 
1502
  self.X_units_ = copy.deepcopy(X_units)
1503
 
1504
  # Re-perform data validation and feature name updating
1505
- X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1506
  # Update feature names with selected variable names
 
1507
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1508
  self.display_feature_names_in_ = self.feature_names_in_
1509
  print(f"Using features {self.feature_names_in_}")
@@ -1517,22 +1632,29 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1517
  else:
1518
  X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
1519
 
1520
- return X, y, variable_names, X_units, y_units
1521
 
1522
- def _run(self, X, y, mutated_params, weights, seed):
 
 
 
 
 
 
 
1523
  """
1524
  Run the symbolic regression fitting process on the julia backend.
1525
 
1526
  Parameters
1527
  ----------
1528
- X : ndarray | pandas.DataFrame
1529
  Training data of shape `(n_samples, n_features)`.
1530
- y : ndarray | pandas.DataFrame
1531
  Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
1532
  Will be cast to `X`'s dtype if necessary.
1533
- mutated_params : dict[str, Any]
1534
- Dictionary of mutated versions of some parameters passed in __init__.
1535
- weights : ndarray | pandas.DataFrame
1536
  Weight array of the same shape as `y`.
1537
  Each element is how to weight the mean-square-error loss
1538
  for that particular element of y.
@@ -1551,24 +1673,27 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1551
  """
1552
  # Need to be global as we don't want to recreate/reinstate julia for
1553
  # every new instance of PySRRegressor
1554
- global already_ran
1555
 
1556
  # These are the parameters which may be modified from the ones
1557
  # specified in init, so we define them here locally:
1558
- binary_operators = mutated_params["binary_operators"]
1559
- unary_operators = mutated_params["unary_operators"]
1560
- maxdepth = mutated_params["maxdepth"]
1561
- constraints = mutated_params["constraints"]
 
 
 
 
 
 
1562
  nested_constraints = self.nested_constraints
1563
  complexity_of_operators = self.complexity_of_operators
1564
- multithreading = mutated_params["multithreading"]
1565
  cluster_manager = self.cluster_manager
1566
- batch_size = mutated_params["batch_size"]
1567
- update_verbosity = mutated_params["update_verbosity"]
1568
- progress = mutated_params["progress"]
1569
 
1570
  # Start julia backend processes
1571
- if not already_ran and update_verbosity != 0:
1572
  print("Compiling Julia backend...")
1573
 
1574
  if cluster_manager is not None:
@@ -1607,6 +1732,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1607
  complexity_of_operators_str += f"({k}) => {v}, "
1608
  complexity_of_operators_str += ")"
1609
  complexity_of_operators = jl.seval(complexity_of_operators_str)
 
 
 
 
1610
 
1611
  custom_loss = jl.seval(
1612
  str(self.elementwise_loss)
@@ -1643,16 +1772,30 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1643
  optimize=self.weight_optimize,
1644
  )
1645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1646
  # Call to Julia backend.
1647
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
1648
  options = SymbolicRegression.Options(
1649
- binary_operators=jl.seval(str(binary_operators).replace("'", "")),
1650
- unary_operators=jl.seval(str(unary_operators).replace("'", "")),
1651
  bin_constraints=jl_array(bin_constraints),
1652
  una_constraints=jl_array(una_constraints),
1653
  complexity_of_operators=complexity_of_operators,
1654
  complexity_of_constants=self.complexity_of_constants,
1655
- complexity_of_variables=self.complexity_of_variables,
1656
  nested_constraints=nested_constraints,
1657
  elementwise_loss=custom_loss,
1658
  loss_function=custom_full_objective,
@@ -1667,6 +1810,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1667
  # These have the same name:
1668
  parsimony=self.parsimony,
1669
  dimensional_constraint_penalty=self.dimensional_constraint_penalty,
 
1670
  alpha=self.alpha,
1671
  maxdepth=maxdepth,
1672
  fast_cycle=self.fast_cycle,
@@ -1678,9 +1822,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1678
  fraction_replaced_hof=self.fraction_replaced_hof,
1679
  should_simplify=self.should_simplify,
1680
  should_optimize_constants=self.should_optimize_constants,
1681
- warmup_maxsize_by=(
1682
- 0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
1683
- ),
1684
  use_frequency=self.use_frequency,
1685
  use_frequency_in_tournament=self.use_frequency_in_tournament,
1686
  adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
@@ -1787,7 +1929,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1787
  if self.delete_tempfiles:
1788
  shutil.rmtree(self.tempdir_)
1789
 
1790
- already_ran = True
1791
 
1792
  return self
1793
 
@@ -1797,9 +1939,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1797
  y,
1798
  Xresampled=None,
1799
  weights=None,
1800
- variable_names: Optional[List[str]] = None,
1801
- X_units: Optional[List[str]] = None,
1802
- y_units: Optional[List[str]] = None,
 
 
 
1803
  ) -> "PySRRegressor":
1804
  """
1805
  Search for equations to fit the dataset and store them in `self.equations_`.
@@ -1858,15 +2003,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1858
  self.selection_mask_ = None
1859
  self.julia_state_stream_ = None
1860
  self.julia_options_stream_ = None
 
1861
  self.X_units_ = None
1862
  self.y_units_ = None
1863
 
1864
- random_state = check_random_state(self.random_state) # For np random
1865
- seed = random_state.get_state()[1][0] # For julia random
1866
-
1867
  self._setup_equation_file()
1868
 
1869
- mutated_params = self._validate_and_set_init_params()
1870
 
1871
  (
1872
  X,
@@ -1874,10 +2017,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1874
  Xresampled,
1875
  weights,
1876
  variable_names,
 
1877
  X_units,
1878
  y_units,
1879
  ) = self._validate_and_set_fit_params(
1880
- X, y, Xresampled, weights, variable_names, X_units, y_units
 
 
 
 
 
 
 
1881
  )
1882
 
1883
  if X.shape[0] > 10000 and not self.batching:
@@ -1891,9 +2042,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1891
  "More datapoints will lower the search speed."
1892
  )
1893
 
 
 
 
1894
  # Pre transformations (feature selection and denoising)
1895
- X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
1896
- X, y, Xresampled, variable_names, X_units, y_units, random_state
 
 
 
 
 
 
 
 
 
1897
  )
1898
 
1899
  # Warn about large feature counts (still warn if feature count is large
@@ -1903,13 +2066,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1903
  "Note: you are running with 10 features or more. "
1904
  "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
1905
  "You should run PySR for more `niterations` to ensure it can find "
1906
- "the correct variables, "
1907
- "or, alternatively, do a dimensionality reduction beforehand. "
1908
- "For example, `X = PCA(n_components=6).fit_transform(X)`, "
1909
- "using scikit-learn's `PCA` class, "
1910
- "will reduce the number of features to 6 in an interpretable way, "
1911
- "as each resultant feature "
1912
- "will be a linear combination of the original features. "
1913
  )
1914
 
1915
  # Assertion checks
@@ -1920,6 +2077,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1920
  X,
1921
  use_custom_variable_names,
1922
  variable_names,
 
1923
  weights,
1924
  y,
1925
  X_units,
@@ -1932,7 +2090,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1932
  self._checkpoint()
1933
 
1934
  # Perform the search:
1935
- self._run(X, y, mutated_params, weights=weights, seed=seed)
1936
 
1937
  # Then, after fit, we save again, so the pickle file contains
1938
  # the equations:
@@ -1941,7 +2099,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1941
 
1942
  return self
1943
 
1944
- def refresh(self, checkpoint_file=None):
1945
  """
1946
  Update self.equations_ with any new options passed.
1947
 
@@ -1950,11 +2108,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1950
 
1951
  Parameters
1952
  ----------
1953
- checkpoint_file : str
1954
  Path to checkpoint hall of fame file to be loaded.
1955
  The default will use the set `equation_file_`.
1956
  """
1957
- if checkpoint_file:
1958
  self.equation_file_ = checkpoint_file
1959
  self.equation_file_contents_ = None
1960
  check_is_fitted(self, attributes=["equation_file_"])
@@ -2006,7 +2164,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2006
  if self.selection_mask_ is not None:
2007
  # RangeIndex enforces column order allowing columns to
2008
  # be correctly filtered with self.selection_mask_
2009
- X = X.iloc[:, self.selection_mask_]
2010
  X.columns = self.feature_names_in_
2011
  # Without feature information, CallableEquation/lambda_format equations
2012
  # require that the column order of X matches that of the X used during
@@ -2016,14 +2174,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2016
  # reordered/reindexed to match those of the transformed (denoised and
2017
  # feature selected) X in fit.
2018
  X = X.reindex(columns=self.feature_names_in_)
2019
- X = self._validate_data(X, reset=False)
2020
 
2021
  try:
2022
- if self.nout_ > 1:
 
2023
  return np.stack(
2024
  [eq["lambda_format"](X) for eq in best_equation], axis=1
2025
  )
2026
- return best_equation["lambda_format"](X)
 
2027
  except Exception as error:
2028
  raise ValueError(
2029
  "Failed to evaluate the expression. "
@@ -2053,9 +2213,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2053
  """
2054
  self.refresh()
2055
  best_equation = self.get_best(index=index)
2056
- if self.nout_ > 1:
 
2057
  return [eq["sympy_format"] for eq in best_equation]
2058
- return best_equation["sympy_format"]
 
2059
 
2060
  def latex(self, index=None, precision=3):
2061
  """
@@ -2115,9 +2277,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2115
  self.set_params(output_jax_format=True)
2116
  self.refresh()
2117
  best_equation = self.get_best(index=index)
2118
- if self.nout_ > 1:
 
2119
  return [eq["jax_format"] for eq in best_equation]
2120
- return best_equation["jax_format"]
 
2121
 
2122
  def pytorch(self, index=None):
2123
  """
@@ -2145,9 +2309,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2145
  self.set_params(output_torch_format=True)
2146
  self.refresh()
2147
  best_equation = self.get_best(index=index)
2148
- if self.nout_ > 1:
2149
  return [eq["torch_format"] for eq in best_equation]
2150
- return best_equation["torch_format"]
 
2151
 
2152
  def _read_equation_file(self):
2153
  """Read the hall of fame file created by `SymbolicRegression.jl`."""
@@ -2246,10 +2411,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2246
  lastComplexity = 0
2247
  sympy_format = []
2248
  lambda_format = []
2249
- if self.output_jax_format:
2250
- jax_format = []
2251
- if self.output_torch_format:
2252
- torch_format = []
2253
 
2254
  for _, eqn_row in output.iterrows():
2255
  eqn = pysr2sympy(
@@ -2361,7 +2524,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2361
  """
2362
  self.refresh()
2363
 
2364
- if self.nout_ > 1:
2365
  if indices is not None:
2366
  assert isinstance(indices, list)
2367
  assert isinstance(indices[0], list)
@@ -2370,7 +2533,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2370
  table_string = sympy2multilatextable(
2371
  self.equations_, indices=indices, precision=precision, columns=columns
2372
  )
2373
- else:
2374
  if indices is not None:
2375
  assert isinstance(indices, list)
2376
  assert isinstance(indices[0], int)
@@ -2378,15 +2541,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2378
  table_string = sympy2latextable(
2379
  self.equations_, indices=indices, precision=precision, columns=columns
2380
  )
 
 
 
 
 
2381
 
2382
- preamble_string = [
2383
- r"\usepackage{breqn}",
2384
- r"\usepackage{booktabs}",
2385
- "",
2386
- "...",
2387
- "",
2388
- ]
2389
- return "\n".join(preamble_string + [table_string])
2390
 
2391
 
2392
  def idx_model_selection(equations: pd.DataFrame, model_selection: str):
@@ -2404,3 +2565,30 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str):
2404
  f"{model_selection} is not a valid model selection strategy."
2405
  )
2406
  return chosen_idx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import sys
9
  import tempfile
10
  import warnings
11
+ from dataclasses import dataclass, fields
12
  from datetime import datetime
13
  from io import StringIO
14
  from multiprocessing import cpu_count
15
  from pathlib import Path
16
+ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
 
 
 
 
 
17
 
18
  import numpy as np
19
  import pandas as pd
20
+ from numpy import ndarray
21
+ from numpy.typing import NDArray
22
  from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
23
  from sklearn.utils import check_array, check_consistent_length, check_random_state
24
+ from sklearn.utils.validation import _check_feature_names_in # type: ignore
25
+ from sklearn.utils.validation import check_is_fitted
26
 
27
  from .denoising import denoise, multi_denoise
28
  from .deprecated import DEPRECATED_KWARGS
29
  from .export_jax import sympy2jax
30
+ from .export_latex import (
31
+ sympy2latex,
32
+ sympy2latextable,
33
+ sympy2multilatextable,
34
+ with_preamble,
35
+ )
36
  from .export_numpy import sympy2numpy
37
  from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
38
  from .export_torch import sympy2torch
 
44
  _load_cluster_manager,
45
  jl_array,
46
  jl_deserialize,
47
+ jl_is_function,
48
  jl_serialize,
49
  )
50
  from .julia_import import SymbolicRegression, jl
51
  from .utils import (
52
+ ArrayLike,
53
+ PathLike,
54
  _csv_filename_to_pkl_filename,
55
  _preprocess_julia_floats,
56
  _safe_check_feature_names_in,
57
  _subscriptify,
58
+ _suggest_keywords,
59
  )
60
 
61
+ ALREADY_RAN = False
62
 
63
 
64
  def _process_constraints(binary_operators, unary_operators, constraints):
 
121
  "and underscores are allowed."
122
  )
123
  if (extra_sympy_mappings is None) or (
124
+ function_name not in extra_sympy_mappings
125
  ):
126
  raise ValueError(
127
  f"Custom function {function_name} is not defined in `extra_sympy_mappings`. "
 
138
  X,
139
  use_custom_variable_names,
140
  variable_names,
141
+ complexity_of_variables,
142
  weights,
143
  y,
144
  X_units,
 
163
  "and underscores are allowed."
164
  )
165
  assert_valid_sympy_symbol(var_name)
166
+ if (
167
+ isinstance(complexity_of_variables, list)
168
+ and len(complexity_of_variables) != X.shape[1]
169
+ ):
170
+ raise ValueError(
171
+ "The number of elements in `complexity_of_variables` must equal the number of features in `X`."
172
+ )
173
  if X_units is not None and len(X_units) != X.shape[1]:
174
  raise ValueError(
175
  "The number of units in `X_units` must equal the number of features in `X`."
 
194
  VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
195
 
196
 
197
+ @dataclass
198
+ class _DynamicallySetParams:
199
+ """Defines some parameters that are set at runtime."""
200
+
201
+ binary_operators: List[str]
202
+ unary_operators: List[str]
203
+ maxdepth: int
204
+ constraints: Dict[str, str]
205
+ multithreading: bool
206
+ batch_size: int
207
+ update_verbosity: int
208
+ progress: bool
209
+ warmup_maxsize_by: float
210
+
211
+
212
  class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
213
  """
214
  High-performance symbolic regression algorithm.
 
340
  `idx` argument to the function, which is `nothing`
341
  for non-batched, and a 1D array of indices for batched.
342
  Default is `None`.
343
+ complexity_of_operators : dict[str, Union[int, float]]
344
  If you would like to use a complexity other than 1 for an
345
  operator, specify the complexity here. For example,
346
  `{"sin": 2, "+": 1}` would give a complexity of 2 for each use
 
349
  numbers for a complexity, and the total complexity of a tree
350
  will be rounded to the nearest integer after computing.
351
  Default is `None`.
352
+ complexity_of_constants : int | float
353
  Complexity of constants. Default is `1`.
354
+ complexity_of_variables : int | float
355
+ Global complexity of variables. To set different complexities for
356
+ different variables, pass a list of complexities to the `fit` method
357
+ with keyword `complexity_of_variables`. You cannot use both.
358
+ Default is `1`.
359
  parsimony : float
360
  Multiplicative factor for how much to punish complexity.
361
  Default is `0.0032`.
362
  dimensional_constraint_penalty : float
363
  Additive penalty for if dimensional analysis of an expression fails.
364
  By default, this is `1000.0`.
365
+ dimensionless_constants_only : bool
366
+ Whether to only search for dimensionless constants, if using units.
367
+ Default is `False`.
368
  use_frequency : bool
369
  Whether to measure the frequency of complexities, and use that
370
  instead of parsimony to explore equation space. Will naturally
 
640
  Units of each variable in the training dataset, `y`.
641
  nout_ : int
642
  Number of output dimensions.
643
+ selection_mask_ : ndarray of shape (`n_features_in_`,)
644
+ Mask of which features of `X` to use when `select_k_features` is set.
 
645
  tempdir_ : Path
646
  Path to the temporary equations directory.
647
+ equation_file_ : Union[str, Path]
648
  Output equation file name produced by the julia backend.
649
  julia_state_stream_ : ndarray
650
  The serialized state for the julia SymbolicRegression.jl backend (after fitting),
651
  stored as an array of uint8, produced by Julia's Serialization.serialize function.
 
 
652
  julia_options_stream_ : ndarray
653
  The serialized julia options, stored as an array of uint8,
 
 
654
  equation_file_contents_ : list[pandas.DataFrame]
655
  Contents of the equation file output by the Julia backend.
656
  show_pickle_warnings_ : bool
 
697
  ```
698
  """
699
 
700
+ equations_: Union[pd.DataFrame, List[pd.DataFrame], None]
701
+ n_features_in_: int
702
+ feature_names_in_: ArrayLike[str]
703
+ display_feature_names_in_: ArrayLike[str]
704
+ complexity_of_variables_: Union[int, float, List[Union[int, float]], None]
705
+ X_units_: Union[ArrayLike[str], None]
706
+ y_units_: Union[str, ArrayLike[str], None]
707
+ nout_: int
708
+ selection_mask_: Union[NDArray[np.bool_], None]
709
+ tempdir_: Path
710
+ equation_file_: PathLike
711
+ julia_state_stream_: Union[NDArray[np.uint8], None]
712
+ julia_options_stream_: Union[NDArray[np.uint8], None]
713
+ equation_file_contents_: Union[List[pd.DataFrame], None]
714
+ show_pickle_warnings_: bool
715
+
716
  def __init__(
717
  self,
718
  model_selection: Literal["best", "accuracy", "score"] = "best",
 
733
  loss_function: Optional[str] = None,
734
  complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
735
  complexity_of_constants: Union[int, float] = 1,
736
+ complexity_of_variables: Optional[Union[int, float]] = None,
737
  parsimony: float = 0.0032,
738
  dimensional_constraint_penalty: Optional[float] = None,
739
+ dimensionless_constants_only: bool = False,
740
  use_frequency: bool = True,
741
  use_frequency_in_tournament: bool = True,
742
  adaptive_parsimony_scaling: float = 20.0,
 
832
  self.complexity_of_variables = complexity_of_variables
833
  self.parsimony = parsimony
834
  self.dimensional_constraint_penalty = dimensional_constraint_penalty
835
+ self.dimensionless_constants_only = dimensionless_constants_only
836
  self.use_frequency = use_frequency
837
  self.use_frequency_in_tournament = use_frequency_in_tournament
838
  self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
 
913
  updated_kwarg_name = DEPRECATED_KWARGS[k]
914
  setattr(self, updated_kwarg_name, v)
915
  warnings.warn(
916
+ f"`{k}` has been renamed to `{updated_kwarg_name}` in PySRRegressor. "
917
  "Please use that instead.",
918
  FutureWarning,
919
  )
920
  # Handle kwargs that have been moved to the fit method
921
  elif k in ["weights", "variable_names", "Xresampled"]:
922
  warnings.warn(
923
+ f"`{k}` is a data-dependent parameter and should be passed when fit is called. "
924
+ f"Ignoring parameter; please pass `{k}` during the call to fit instead.",
925
  FutureWarning,
926
  )
927
  elif k == "julia_project":
 
938
  FutureWarning,
939
  )
940
  else:
941
+ suggested_keywords = _suggest_keywords(PySRRegressor, k)
942
+ err_msg = (
943
+ f"`{k}` is not a valid keyword argument for PySRRegressor."
944
  )
945
+ if len(suggested_keywords) > 0:
946
+ err_msg += f" Did you mean {', '.join(map(lambda s: f'`{s}`', suggested_keywords))}?"
947
+ raise TypeError(err_msg)
948
 
949
  @classmethod
950
  def from_file(
951
  cls,
952
+ equation_file: PathLike,
953
  *,
954
+ binary_operators: Optional[List[str]] = None,
955
+ unary_operators: Optional[List[str]] = None,
956
+ n_features_in: Optional[int] = None,
957
+ feature_names_in: Optional[ArrayLike[str]] = None,
958
+ selection_mask: Optional[NDArray[np.bool_]] = None,
959
+ nout: int = 1,
960
  verbosity=1,
961
  **pysr_kwargs,
962
  ):
 
965
 
966
  Parameters
967
  ----------
968
+ equation_file : str or Path
969
  Path to a pickle file containing a saved model, or a csv file
970
  containing equations.
971
  binary_operators : list[str]
 
980
  feature_names_in : list[str]
981
  Names of the features passed to the model.
982
  Not needed if loading from a pickle file.
983
+ selection_mask : NDArray[np.bool_]
984
+ If using `select_k_features`, you must pass `model.selection_mask_` here.
985
  Not needed if loading from a pickle file.
986
  nout : int
987
  Number of outputs of the model.
 
1037
 
1038
  # TODO: copy .bkup file if exists.
1039
  model = cls(
1040
+ equation_file=str(equation_file),
1041
  binary_operators=binary_operators,
1042
  unary_operators=unary_operators,
1043
  **pysr_kwargs,
 
1057
  model.display_feature_names_in_ = feature_names_in
1058
 
1059
  if selection_mask is None:
1060
+ model.selection_mask_ = np.ones(n_features_in, dtype=np.bool_)
1061
  else:
1062
  model.selection_mask_ = selection_mask
1063
 
 
1084
  all_equations = equations
1085
 
1086
  for i, equations in enumerate(all_equations):
1087
+ selected = pd.Series([""] * len(equations), index=equations.index)
1088
  chosen_row = idx_model_selection(equations, self.model_selection)
1089
  selected[chosen_row] = ">>>>"
1090
  repr_equations = pd.DataFrame(
 
1117
  Handle pickle serialization for PySRRegressor.
1118
 
1119
  The Scikit-learn standard requires estimators to be serializable via
1120
+ `pickle.dumps()`. However, some attributes do not support pickling
1121
+ and need to be hidden, such as the JAX and Torch representations.
 
 
 
 
 
 
 
1122
  """
1123
  state = self.__dict__
1124
  show_pickle_warning = not (
 
1184
 
1185
  @property
1186
  def julia_options_(self):
1187
+ """The deserialized julia options."""
1188
  return jl_deserialize(self.julia_options_stream_)
1189
 
1190
  @property
1191
  def julia_state_(self):
1192
+ """The deserialized state."""
1193
  return jl_deserialize(self.julia_state_stream_)
1194
 
1195
  @property
 
1202
  )
1203
  return self.julia_state_
1204
 
1205
+ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
1206
  """
1207
  Get best equation using `model_selection`.
1208
 
 
1225
  Raised when an invalid model selection strategy is provided.
1226
  """
1227
  check_is_fitted(self, attributes=["equations_"])
 
 
1228
 
1229
  if index is not None:
1230
  if isinstance(self.equations_, list):
 
1232
  index, list
1233
  ), "With multiple output features, index must be a list."
1234
  return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
1235
+ else:
1236
+ equations_ = cast(pd.DataFrame, self.equations_)
1237
+ return cast(pd.Series, equations_.iloc[index])
1238
 
1239
  if isinstance(self.equations_, list):
1240
  return [
1241
+ cast(pd.Series, eq.loc[idx_model_selection(eq, self.model_selection)])
1242
  for eq in self.equations_
1243
  ]
1244
+ else:
1245
+ equations_ = cast(pd.DataFrame, self.equations_)
1246
+ return cast(
1247
+ pd.Series,
1248
+ equations_.loc[idx_model_selection(equations_, self.model_selection)],
1249
+ )
1250
 
1251
  def _setup_equation_file(self):
1252
  """
 
1271
  self.equation_file_ = self.equation_file
1272
  self.equation_file_contents_ = None
1273
 
1274
+ def _validate_and_modify_params(self) -> _DynamicallySetParams:
1275
  """
1276
  Ensure parameters passed at initialization are valid.
1277
 
 
1329
  f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
1330
  )
1331
 
1332
+ param_container = _DynamicallySetParams(
1333
+ binary_operators=["+", "*", "-", "/"],
1334
+ unary_operators=[],
1335
+ maxdepth=self.maxsize,
1336
+ constraints={},
1337
+ multithreading=self.procs != 0 and self.cluster_manager is None,
1338
+ batch_size=1,
1339
+ update_verbosity=int(self.verbosity),
1340
+ progress=self.progress,
1341
+ warmup_maxsize_by=0.0,
1342
+ )
1343
+
1344
+ for param_name in map(lambda x: x.name, fields(_DynamicallySetParams)):
1345
+ user_param_value = getattr(self, param_name)
1346
+ if user_param_value is None:
1347
+ # Leave as the default in DynamicallySetParams
1348
+ ...
 
1349
  else:
1350
+ # If user has specified it, we will override the default.
1351
+ # However, there are some special cases to mutate it:
1352
+ new_param_value = _mutate_parameter(param_name, user_param_value)
1353
+ setattr(param_container, param_name, new_param_value)
1354
+ # TODO: This should just be part of the __init__ of _DynamicallySetParams
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1355
 
1356
  assert (
1357
+ len(param_container.binary_operators) > 0
1358
+ or len(param_container.unary_operators) > 0
1359
+ ), "At least one operator must be provided."
 
1360
 
1361
+ return param_container
1362
 
1363
  def _validate_and_set_fit_params(
1364
+ self,
1365
+ X,
1366
+ y,
1367
+ Xresampled,
1368
+ weights,
1369
+ variable_names,
1370
+ complexity_of_variables,
1371
+ X_units,
1372
+ y_units,
1373
+ ) -> Tuple[
1374
+ ndarray,
1375
+ ndarray,
1376
+ Optional[ndarray],
1377
+ Optional[ndarray],
1378
+ ArrayLike[str],
1379
+ Union[int, float, List[Union[int, float]]],
1380
+ Optional[ArrayLike[str]],
1381
+ Optional[Union[str, ArrayLike[str]]],
1382
+ ]:
1383
  """
1384
  Validate the parameters passed to the :term`fit` method.
1385
 
 
1399
  Weight array of the same shape as `y`.
1400
  Each element is how to weight the mean-square-error loss
1401
  for that particular element of y.
1402
+ variable_names : ndarray of length n_features
1403
+ Names of each feature in the training dataset, `X`.
1404
+ complexity_of_variables : int | float | list[int | float]
1405
+ Complexity of each feature in the training dataset, `X`.
1406
  X_units : list[str] of length n_features
1407
+ Units of each feature in the training dataset, `X`.
1408
  y_units : str | list[str] of length n_out
1409
+ Units of each feature in the training dataset, `y`.
1410
 
1411
  Returns
1412
  -------
 
1450
  "Please use valid names instead."
1451
  )
1452
 
1453
+ if (
1454
+ complexity_of_variables is not None
1455
+ and self.complexity_of_variables is not None
1456
+ ):
1457
+ raise ValueError(
1458
+ "You cannot set `complexity_of_variables` at both `fit` and `__init__`. "
1459
+ "Pass it at `__init__` to set it to global default, OR use `fit` to set it for "
1460
+ "each variable individually."
1461
+ )
1462
+ elif complexity_of_variables is not None:
1463
+ complexity_of_variables = complexity_of_variables
1464
+ elif self.complexity_of_variables is not None:
1465
+ complexity_of_variables = self.complexity_of_variables
1466
+ else:
1467
+ complexity_of_variables = 1
1468
+
1469
  # Data validation and feature name fetching via sklearn
1470
  # This method sets the n_features_in_ attribute
1471
  if Xresampled is not None:
 
1473
  if weights is not None:
1474
  weights = check_array(weights, ensure_2d=False)
1475
  check_consistent_length(weights, y)
1476
+ X, y = self._validate_data_X_y(X, y)
1477
  self.feature_names_in_ = _safe_check_feature_names_in(
1478
  self, variable_names, generate_names=False
1479
  )
 
1483
  self.display_feature_names_in_ = np.array(
1484
  [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1485
  )
1486
+ variable_names = self.feature_names_in_
1487
  else:
1488
  self.display_feature_names_in_ = self.feature_names_in_
1489
+ variable_names = self.feature_names_in_
 
1490
 
1491
  # Handle multioutput data
1492
  if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
 
1496
  else:
1497
  raise NotImplementedError("y shape not supported!")
1498
 
1499
+ self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
1500
  self.X_units_ = copy.deepcopy(X_units)
1501
  self.y_units_ = copy.deepcopy(y_units)
1502
 
1503
+ return (
1504
+ X,
1505
+ y,
1506
+ Xresampled,
1507
+ weights,
1508
+ variable_names,
1509
+ complexity_of_variables,
1510
+ X_units,
1511
+ y_units,
1512
+ )
1513
+
1514
+ def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
1515
+ raw_out = self._validate_data(X=X, y=y, reset=True, multi_output=True) # type: ignore
1516
+ return cast(Tuple[ndarray, ndarray], raw_out)
1517
+
1518
+ def _validate_data_X(self, X) -> Tuple[ndarray]:
1519
+ raw_out = self._validate_data(X=X, reset=False) # type: ignore
1520
+ return cast(Tuple[ndarray], raw_out)
1521
 
1522
  def _pre_transform_training_data(
1523
+ self,
1524
+ X: ndarray,
1525
+ y: ndarray,
1526
+ Xresampled: Union[ndarray, None],
1527
+ variable_names: ArrayLike[str],
1528
+ complexity_of_variables: Union[int, float, List[Union[int, float]]],
1529
+ X_units: Union[ArrayLike[str], None],
1530
+ y_units: Union[ArrayLike[str], str, None],
1531
+ random_state: np.random.RandomState,
1532
  ):
1533
  """
1534
  Transform the training data before fitting the symbolic regressor.
 
1537
 
1538
  Parameters
1539
  ----------
1540
+ X : ndarray
1541
  Training data of shape (n_samples, n_features).
1542
+ y : ndarray
1543
  Target values of shape (n_samples,) or (n_samples, n_targets).
1544
  Will be cast to X's dtype if necessary.
1545
+ Xresampled : ndarray | None
1546
  Resampled training data, of shape `(n_resampled, n_features)`,
1547
  used for denoising.
1548
  variable_names : list[str]
1549
  Names of each variable in the training dataset, `X`.
1550
  Of length `n_features`.
1551
+ complexity_of_variables : int | float | list[int | float]
1552
+ Complexity of each variable in the training dataset, `X`.
1553
  X_units : list[str]
1554
  Units of each variable in the training dataset, `X`.
1555
  y_units : str | list[str]
 
1582
  """
1583
  # Feature selection transformation
1584
  if self.select_k_features:
1585
+ selection_mask = run_feature_selection(
1586
  X, y, self.select_k_features, random_state=random_state
1587
  )
1588
+ X = X[:, selection_mask]
1589
 
1590
  if Xresampled is not None:
1591
+ Xresampled = Xresampled[:, selection_mask]
1592
 
1593
  # Reduce variable_names to selection
1594
+ variable_names = cast(
1595
+ ArrayLike[str],
1596
+ [
1597
+ variable_names[i]
1598
+ for i in range(len(variable_names))
1599
+ if selection_mask[i]
1600
+ ],
1601
+ )
1602
+
1603
+ if isinstance(complexity_of_variables, list):
1604
+ complexity_of_variables = [
1605
+ complexity_of_variables[i]
1606
+ for i in range(len(complexity_of_variables))
1607
+ if selection_mask[i]
1608
+ ]
1609
+ self.complexity_of_variables_ = copy.deepcopy(complexity_of_variables)
1610
 
1611
  if X_units is not None:
1612
+ X_units = cast(
1613
+ ArrayLike[str],
1614
+ [X_units[i] for i in range(len(X_units)) if selection_mask[i]],
1615
+ )
1616
  self.X_units_ = copy.deepcopy(X_units)
1617
 
1618
  # Re-perform data validation and feature name updating
1619
+ X, y = self._validate_data_X_y(X, y)
1620
  # Update feature names with selected variable names
1621
+ self.selection_mask_ = selection_mask
1622
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1623
  self.display_feature_names_in_ = self.feature_names_in_
1624
  print(f"Using features {self.feature_names_in_}")
 
1632
  else:
1633
  X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
1634
 
1635
+ return X, y, variable_names, complexity_of_variables, X_units, y_units
1636
 
1637
+ def _run(
1638
+ self,
1639
+ X: ndarray,
1640
+ y: ndarray,
1641
+ runtime_params: _DynamicallySetParams,
1642
+ weights: Optional[ndarray],
1643
+ seed: int,
1644
+ ):
1645
  """
1646
  Run the symbolic regression fitting process on the julia backend.
1647
 
1648
  Parameters
1649
  ----------
1650
+ X : ndarray
1651
  Training data of shape `(n_samples, n_features)`.
1652
+ y : ndarray
1653
  Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
1654
  Will be cast to `X`'s dtype if necessary.
1655
+ runtime_params : DynamicallySetParams
1656
+ Dynamically set versions of some parameters passed in __init__.
1657
+ weights : ndarray | None
1658
  Weight array of the same shape as `y`.
1659
  Each element is how to weight the mean-square-error loss
1660
  for that particular element of y.
 
1673
  """
1674
  # Need to be global as we don't want to recreate/reinstate julia for
1675
  # every new instance of PySRRegressor
1676
+ global ALREADY_RAN
1677
 
1678
  # These are the parameters which may be modified from the ones
1679
  # specified in init, so we define them here locally:
1680
+ binary_operators = runtime_params.binary_operators
1681
+ unary_operators = runtime_params.unary_operators
1682
+ maxdepth = runtime_params.maxdepth
1683
+ constraints = runtime_params.constraints
1684
+ multithreading = runtime_params.multithreading
1685
+ batch_size = runtime_params.batch_size
1686
+ update_verbosity = runtime_params.update_verbosity
1687
+ progress = runtime_params.progress
1688
+ warmup_maxsize_by = runtime_params.warmup_maxsize_by
1689
+
1690
  nested_constraints = self.nested_constraints
1691
  complexity_of_operators = self.complexity_of_operators
1692
+ complexity_of_variables = self.complexity_of_variables_
1693
  cluster_manager = self.cluster_manager
 
 
 
1694
 
1695
  # Start julia backend processes
1696
+ if not ALREADY_RAN and update_verbosity != 0:
1697
  print("Compiling Julia backend...")
1698
 
1699
  if cluster_manager is not None:
 
1732
  complexity_of_operators_str += f"({k}) => {v}, "
1733
  complexity_of_operators_str += ")"
1734
  complexity_of_operators = jl.seval(complexity_of_operators_str)
1735
+ # TODO: Refactor this into helper function
1736
+
1737
+ if isinstance(complexity_of_variables, list):
1738
+ complexity_of_variables = jl_array(complexity_of_variables)
1739
 
1740
  custom_loss = jl.seval(
1741
  str(self.elementwise_loss)
 
1772
  optimize=self.weight_optimize,
1773
  )
1774
 
1775
+ jl_binary_operators: List[Any] = []
1776
+ jl_unary_operators: List[Any] = []
1777
+ for input_list, output_list, name in [
1778
+ (binary_operators, jl_binary_operators, "binary"),
1779
+ (unary_operators, jl_unary_operators, "unary"),
1780
+ ]:
1781
+ for op in input_list:
1782
+ jl_op = jl.seval(op)
1783
+ if not jl_is_function(jl_op):
1784
+ raise ValueError(
1785
+ f"When building `{name}_operators`, `'{op}'` did not return a Julia function"
1786
+ )
1787
+ output_list.append(jl_op)
1788
+
1789
  # Call to Julia backend.
1790
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
1791
  options = SymbolicRegression.Options(
1792
+ binary_operators=jl_array(jl_binary_operators, dtype=jl.Function),
1793
+ unary_operators=jl_array(jl_unary_operators, dtype=jl.Function),
1794
  bin_constraints=jl_array(bin_constraints),
1795
  una_constraints=jl_array(una_constraints),
1796
  complexity_of_operators=complexity_of_operators,
1797
  complexity_of_constants=self.complexity_of_constants,
1798
+ complexity_of_variables=complexity_of_variables,
1799
  nested_constraints=nested_constraints,
1800
  elementwise_loss=custom_loss,
1801
  loss_function=custom_full_objective,
 
1810
  # These have the same name:
1811
  parsimony=self.parsimony,
1812
  dimensional_constraint_penalty=self.dimensional_constraint_penalty,
1813
+ dimensionless_constants_only=self.dimensionless_constants_only,
1814
  alpha=self.alpha,
1815
  maxdepth=maxdepth,
1816
  fast_cycle=self.fast_cycle,
 
1822
  fraction_replaced_hof=self.fraction_replaced_hof,
1823
  should_simplify=self.should_simplify,
1824
  should_optimize_constants=self.should_optimize_constants,
1825
+ warmup_maxsize_by=warmup_maxsize_by,
 
 
1826
  use_frequency=self.use_frequency,
1827
  use_frequency_in_tournament=self.use_frequency_in_tournament,
1828
  adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
 
1929
  if self.delete_tempfiles:
1930
  shutil.rmtree(self.tempdir_)
1931
 
1932
+ ALREADY_RAN = True
1933
 
1934
  return self
1935
 
 
1939
  y,
1940
  Xresampled=None,
1941
  weights=None,
1942
+ variable_names: Optional[ArrayLike[str]] = None,
1943
+ complexity_of_variables: Optional[
1944
+ Union[int, float, List[Union[int, float]]]
1945
+ ] = None,
1946
+ X_units: Optional[ArrayLike[str]] = None,
1947
+ y_units: Optional[Union[str, ArrayLike[str]]] = None,
1948
  ) -> "PySRRegressor":
1949
  """
1950
  Search for equations to fit the dataset and store them in `self.equations_`.
 
2003
  self.selection_mask_ = None
2004
  self.julia_state_stream_ = None
2005
  self.julia_options_stream_ = None
2006
+ self.complexity_of_variables_ = None
2007
  self.X_units_ = None
2008
  self.y_units_ = None
2009
 
 
 
 
2010
  self._setup_equation_file()
2011
 
2012
+ runtime_params = self._validate_and_modify_params()
2013
 
2014
  (
2015
  X,
 
2017
  Xresampled,
2018
  weights,
2019
  variable_names,
2020
+ complexity_of_variables,
2021
  X_units,
2022
  y_units,
2023
  ) = self._validate_and_set_fit_params(
2024
+ X,
2025
+ y,
2026
+ Xresampled,
2027
+ weights,
2028
+ variable_names,
2029
+ complexity_of_variables,
2030
+ X_units,
2031
+ y_units,
2032
  )
2033
 
2034
  if X.shape[0] > 10000 and not self.batching:
 
2042
  "More datapoints will lower the search speed."
2043
  )
2044
 
2045
+ random_state = check_random_state(self.random_state) # For np random
2046
+ seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random
2047
+
2048
  # Pre transformations (feature selection and denoising)
2049
+ X, y, variable_names, complexity_of_variables, X_units, y_units = (
2050
+ self._pre_transform_training_data(
2051
+ X,
2052
+ y,
2053
+ Xresampled,
2054
+ variable_names,
2055
+ complexity_of_variables,
2056
+ X_units,
2057
+ y_units,
2058
+ random_state,
2059
+ )
2060
  )
2061
 
2062
  # Warn about large feature counts (still warn if feature count is large
 
2066
  "Note: you are running with 10 features or more. "
2067
  "Genetic algorithms like used in PySR scale poorly with large numbers of features. "
2068
  "You should run PySR for more `niterations` to ensure it can find "
2069
+ "the correct variables, and consider using a larger `maxsize`."
 
 
 
 
 
 
2070
  )
2071
 
2072
  # Assertion checks
 
2077
  X,
2078
  use_custom_variable_names,
2079
  variable_names,
2080
+ complexity_of_variables,
2081
  weights,
2082
  y,
2083
  X_units,
 
2090
  self._checkpoint()
2091
 
2092
  # Perform the search:
2093
+ self._run(X, y, runtime_params, weights=weights, seed=seed)
2094
 
2095
  # Then, after fit, we save again, so the pickle file contains
2096
  # the equations:
 
2099
 
2100
  return self
2101
 
2102
+ def refresh(self, checkpoint_file: Optional[PathLike] = None) -> None:
2103
  """
2104
  Update self.equations_ with any new options passed.
2105
 
 
2108
 
2109
  Parameters
2110
  ----------
2111
+ checkpoint_file : str or Path
2112
  Path to checkpoint hall of fame file to be loaded.
2113
  The default will use the set `equation_file_`.
2114
  """
2115
+ if checkpoint_file is not None:
2116
  self.equation_file_ = checkpoint_file
2117
  self.equation_file_contents_ = None
2118
  check_is_fitted(self, attributes=["equation_file_"])
 
2164
  if self.selection_mask_ is not None:
2165
  # RangeIndex enforces column order allowing columns to
2166
  # be correctly filtered with self.selection_mask_
2167
+ X = X[X.columns[self.selection_mask_]]
2168
  X.columns = self.feature_names_in_
2169
  # Without feature information, CallableEquation/lambda_format equations
2170
  # require that the column order of X matches that of the X used during
 
2174
  # reordered/reindexed to match those of the transformed (denoised and
2175
  # feature selected) X in fit.
2176
  X = X.reindex(columns=self.feature_names_in_)
2177
+ X = self._validate_data_X(X)
2178
 
2179
  try:
2180
+ if isinstance(best_equation, list):
2181
+ assert self.nout_ > 1
2182
  return np.stack(
2183
  [eq["lambda_format"](X) for eq in best_equation], axis=1
2184
  )
2185
+ else:
2186
+ return best_equation["lambda_format"](X)
2187
  except Exception as error:
2188
  raise ValueError(
2189
  "Failed to evaluate the expression. "
 
2213
  """
2214
  self.refresh()
2215
  best_equation = self.get_best(index=index)
2216
+ if isinstance(best_equation, list):
2217
+ assert self.nout_ > 1
2218
  return [eq["sympy_format"] for eq in best_equation]
2219
+ else:
2220
+ return best_equation["sympy_format"]
2221
 
2222
  def latex(self, index=None, precision=3):
2223
  """
 
2277
  self.set_params(output_jax_format=True)
2278
  self.refresh()
2279
  best_equation = self.get_best(index=index)
2280
+ if isinstance(best_equation, list):
2281
+ assert self.nout_ > 1
2282
  return [eq["jax_format"] for eq in best_equation]
2283
+ else:
2284
+ return best_equation["jax_format"]
2285
 
2286
  def pytorch(self, index=None):
2287
  """
 
2309
  self.set_params(output_torch_format=True)
2310
  self.refresh()
2311
  best_equation = self.get_best(index=index)
2312
+ if isinstance(best_equation, list):
2313
  return [eq["torch_format"] for eq in best_equation]
2314
+ else:
2315
+ return best_equation["torch_format"]
2316
 
2317
  def _read_equation_file(self):
2318
  """Read the hall of fame file created by `SymbolicRegression.jl`."""
 
2411
  lastComplexity = 0
2412
  sympy_format = []
2413
  lambda_format = []
2414
+ jax_format = []
2415
+ torch_format = []
 
 
2416
 
2417
  for _, eqn_row in output.iterrows():
2418
  eqn = pysr2sympy(
 
2524
  """
2525
  self.refresh()
2526
 
2527
+ if isinstance(self.equations_, list):
2528
  if indices is not None:
2529
  assert isinstance(indices, list)
2530
  assert isinstance(indices[0], list)
 
2533
  table_string = sympy2multilatextable(
2534
  self.equations_, indices=indices, precision=precision, columns=columns
2535
  )
2536
+ elif isinstance(self.equations_, pd.DataFrame):
2537
  if indices is not None:
2538
  assert isinstance(indices, list)
2539
  assert isinstance(indices[0], int)
 
2541
  table_string = sympy2latextable(
2542
  self.equations_, indices=indices, precision=precision, columns=columns
2543
  )
2544
+ else:
2545
+ raise ValueError(
2546
+ "Invalid type for equations_ to pass to `latex_table`. "
2547
+ "Expected a DataFrame or a list of DataFrames."
2548
+ )
2549
 
2550
+ return with_preamble(table_string)
 
 
 
 
 
 
 
2551
 
2552
 
2553
  def idx_model_selection(equations: pd.DataFrame, model_selection: str):
 
2565
  f"{model_selection} is not a valid model selection strategy."
2566
  )
2567
  return chosen_idx
2568
+
2569
+
2570
+ def _mutate_parameter(param_name: str, param_value):
2571
+ if param_name in ["binary_operators", "unary_operators"] and isinstance(
2572
+ param_value, str
2573
+ ):
2574
+ return [param_value]
2575
+
2576
+ if param_name == "batch_size" and param_value < 1:
2577
+ warnings.warn(
2578
+ "Given `batch_size` must be greater than or equal to one. "
2579
+ "`batch_size` has been increased to equal one."
2580
+ )
2581
+ return 1
2582
+
2583
+ if (
2584
+ param_name == "progress"
2585
+ and param_value == True
2586
+ and "buffer" not in sys.stdout.__dir__()
2587
+ ):
2588
+ warnings.warn(
2589
+ "Note: it looks like you are running in Jupyter. "
2590
+ "The progress bar will be turned off."
2591
+ )
2592
+ return False
2593
+
2594
+ return param_value
pysr/test/__main__.py CHANGED
@@ -1,4 +1,5 @@
1
  """CLI for running PySR's test suite."""
 
2
  import argparse
3
 
4
  from . import *
 
1
  """CLI for running PySR's test suite."""
2
+
3
  import argparse
4
 
5
  from . import *
pysr/test/params.py CHANGED
@@ -1,6 +1,6 @@
1
  import inspect
2
 
3
- from .. import PySRRegressor
4
 
5
  DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
6
  DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
 
1
  import inspect
2
 
3
+ from pysr import PySRRegressor
4
 
5
  DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
6
  DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
pysr/test/test.py CHANGED
@@ -11,12 +11,18 @@ import pandas as pd
11
  import sympy
12
  from sklearn.utils.estimator_checks import check_estimator
13
 
14
- from .. import PySRRegressor, install, jl
15
- from ..export_latex import sympy2latex
16
- from ..feature_selection import _handle_feature_selection, run_feature_selection
17
- from ..julia_helpers import init_julia
18
- from ..sr import _check_assertions, _process_constraints, idx_model_selection
19
- from ..utils import _csv_filename_to_pkl_filename
 
 
 
 
 
 
20
  from .params import (
21
  DEFAULT_NCYCLES,
22
  DEFAULT_NITERATIONS,
@@ -24,6 +30,11 @@ from .params import (
24
  DEFAULT_POPULATIONS,
25
  )
26
 
 
 
 
 
 
27
 
28
  class TestPipeline(unittest.TestCase):
29
  def setUp(self):
@@ -171,6 +182,63 @@ class TestPipeline(unittest.TestCase):
171
  self.assertLessEqual(mse1, 1e-4)
172
  self.assertLessEqual(mse2, 1e-4)
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  def test_multioutput_weighted_with_callable_temp_equation(self):
175
  X = self.X.copy()
176
  y = X[:, [0, 1]] ** 2
@@ -308,7 +376,10 @@ class TestPipeline(unittest.TestCase):
308
  "unused_feature": self.rstate.randn(500),
309
  }
310
  )
311
- true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
 
 
 
312
  y = true_fn(X)
313
  noise = self.rstate.randn(500) * 0.01
314
  y = y + noise
@@ -367,13 +438,12 @@ class TestPipeline(unittest.TestCase):
367
 
368
  def test_load_model(self):
369
  """See if we can load a ran model from the equation file."""
370
- csv_file_data = """
371
- Complexity,Loss,Equation
372
  1,0.19951081,"1.9762075"
373
  3,0.12717344,"(f0 + 1.4724599)"
374
  4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
375
  # Strip the indents:
376
- csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
377
 
378
  for from_backup in [False, True]:
379
  rand_dir = Path(tempfile.mkdtemp())
@@ -425,12 +495,22 @@ class TestPipeline(unittest.TestCase):
425
  if os.path.exists(file_to_delete):
426
  os.remove(file_to_delete)
427
 
428
- pickle_file = rand_dir / "equations.pkl"
429
  model3 = PySRRegressor.from_file(
430
  model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
431
  )
432
  np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
433
 
 
 
 
 
 
 
 
 
 
 
434
 
435
  def manually_create_model(equations, feature_names=None):
436
  if feature_names is None:
@@ -526,7 +606,7 @@ class TestFeatureSelection(unittest.TestCase):
526
  X = self.rstate.randn(20000, 5)
527
  y = X[:, 2] ** 2 + X[:, 3] ** 2
528
  selected = run_feature_selection(X, y, select_k_features=2)
529
- self.assertEqual(sorted(selected), [2, 3])
530
 
531
  def test_feature_selection_handler(self):
532
  X = self.rstate.randn(20000, 5)
@@ -538,8 +618,8 @@ class TestFeatureSelection(unittest.TestCase):
538
  variable_names=var_names,
539
  y=y,
540
  )
541
- self.assertTrue((2 in selection) and (3 in selection))
542
- selected_var_names = [var_names[i] for i in selection]
543
  self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
544
  np.testing.assert_array_equal(
545
  np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
@@ -563,6 +643,105 @@ class TestMiscellaneous(unittest.TestCase):
563
  test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
564
  self.assertEqual(test_pkl_file, str(expected_pkl_file))
565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  def test_deprecation(self):
567
  """Ensure that deprecation works as expected.
568
 
@@ -705,100 +884,28 @@ class TestMiscellaneous(unittest.TestCase):
705
  model.get_best()
706
  print("Failed", opt["kwargs"])
707
 
708
- def test_pickle_with_temp_equation_file(self):
709
- """If we have a temporary equation file, unpickle the estimator."""
710
- model = PySRRegressor(
711
- populations=int(1 + DEFAULT_POPULATIONS / 5),
712
- temp_equation_file=True,
713
- procs=0,
714
- multithreading=False,
715
  )
716
- nout = 3
717
- X = np.random.randn(100, 2)
718
- y = np.random.randn(100, nout)
719
- model.fit(X, y)
720
- contents = model.equation_file_contents_.copy()
721
 
722
- y_predictions = model.predict(X)
723
-
724
- equation_file_base = model.equation_file_
725
- for i in range(1, nout + 1):
726
- assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
727
-
728
- with tempfile.NamedTemporaryFile() as pickle_file:
729
- pkl.dump(model, pickle_file)
730
- pickle_file.seek(0)
731
- model2 = pkl.load(pickle_file)
732
-
733
- contents2 = model2.equation_file_contents_
734
- cols_to_check = ["equation", "loss", "complexity"]
735
- for frame1, frame2 in zip(contents, contents2):
736
- pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
737
-
738
- y_predictions2 = model2.predict(X)
739
- np.testing.assert_array_equal(y_predictions, y_predictions2)
740
 
741
- def test_scikit_learn_compatibility(self):
742
- """Test PySRRegressor compatibility with scikit-learn."""
743
- model = PySRRegressor(
744
- niterations=int(1 + DEFAULT_NITERATIONS / 10),
745
- populations=int(1 + DEFAULT_POPULATIONS / 3),
746
- ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
747
- verbosity=0,
748
- progress=False,
749
- random_state=0,
750
- deterministic=True, # Deterministic as tests require this.
751
- procs=0,
752
- multithreading=False,
753
- warm_start=False,
754
- temp_equation_file=True,
755
- ) # Return early.
756
-
757
- check_generator = check_estimator(model, generate_only=True)
758
- exception_messages = []
759
- for _, check in check_generator:
760
- if check.func.__name__ == "check_complex_data":
761
- # We can use complex data, so avoid this check.
762
- continue
763
- try:
764
- with warnings.catch_warnings():
765
- warnings.simplefilter("ignore")
766
- check(model)
767
- print("Passed", check.func.__name__)
768
- except Exception:
769
- error_message = str(traceback.format_exc())
770
- exception_messages.append(
771
- f"{check.func.__name__}:\n" + error_message + "\n"
772
- )
773
- print("Failed", check.func.__name__, "with:")
774
- # Add a leading tab to error message, which
775
- # might be multi-line:
776
- print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
777
- # If any checks failed don't let the test pass.
778
- self.assertEqual(len(exception_messages), 0)
779
-
780
- def test_param_groupings(self):
781
- """Test that param_groupings are complete"""
782
- param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
783
- if not param_groupings_file.exists():
784
- return
785
-
786
- # Read the file, discarding lines ending in ":",
787
- # and removing leading "\s*-\s*":
788
- params = []
789
- with open(param_groupings_file, "r") as f:
790
- for line in f.readlines():
791
- if line.strip().endswith(":"):
792
- continue
793
- if line.strip().startswith("-"):
794
- params.append(line.strip()[1:].strip())
795
 
796
- regressor_params = [
797
- p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
798
- ]
799
 
800
- # Check the sets are equal:
801
- self.assertSetEqual(set(params), set(regressor_params))
802
 
803
 
804
  TRUE_PREAMBLE = "\n".join(
@@ -932,7 +1039,7 @@ class TestLaTeXTable(unittest.TestCase):
932
  middle_part_2 = r"""
933
  $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
934
  $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
935
- $y_{1} = x_{0}^{2} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
936
  """
937
  true_latex_table_str = "\n\n".join(
938
  self.create_true_latex(part, include_score=True)
@@ -985,7 +1092,7 @@ class TestLaTeXTable(unittest.TestCase):
985
  middle_part = r"""
986
  $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
987
  $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
988
- \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0}^{5} + x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} - 5.20 \sin{\left(2.60 x_{0} - 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
989
  """
990
  true_latex_table_str = (
991
  TRUE_PREAMBLE
@@ -1039,8 +1146,14 @@ class TestDimensionalConstraints(unittest.TestCase):
1039
  """This just checks the number of units passed"""
1040
  use_custom_variable_names = False
1041
  variable_names = None
 
1042
  weights = None
1043
- args = (use_custom_variable_names, variable_names, weights)
 
 
 
 
 
1044
  valid_units = [
1045
  (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
1046
  (np.ones((10, 1)), np.ones(10), ["m/s"], None),
@@ -1148,6 +1261,7 @@ def runtests(just_tests=False):
1148
  TestBest,
1149
  TestFeatureSelection,
1150
  TestMiscellaneous,
 
1151
  TestLaTeXTable,
1152
  TestDimensionalConstraints,
1153
  ]
 
11
  import sympy
12
  from sklearn.utils.estimator_checks import check_estimator
13
 
14
+ from pysr import PySRRegressor, install, jl
15
+ from pysr.export_latex import sympy2latex
16
+ from pysr.feature_selection import _handle_feature_selection, run_feature_selection
17
+ from pysr.julia_helpers import init_julia
18
+ from pysr.sr import (
19
+ _check_assertions,
20
+ _process_constraints,
21
+ _suggest_keywords,
22
+ idx_model_selection,
23
+ )
24
+ from pysr.utils import _csv_filename_to_pkl_filename
25
+
26
  from .params import (
27
  DEFAULT_NCYCLES,
28
  DEFAULT_NITERATIONS,
 
30
  DEFAULT_POPULATIONS,
31
  )
32
 
33
+ # Disables local saving:
34
+ os.environ["SYMBOLIC_REGRESSION_IS_TESTING"] = os.environ.get(
35
+ "SYMBOLIC_REGRESSION_IS_TESTING", "true"
36
+ )
37
+
38
 
39
  class TestPipeline(unittest.TestCase):
40
  def setUp(self):
 
182
  self.assertLessEqual(mse1, 1e-4)
183
  self.assertLessEqual(mse2, 1e-4)
184
 
185
+ def test_custom_variable_complexity(self):
186
+ for outer in (True, False):
187
+ for case in (1, 2):
188
+ y = self.X[:, [0, 1]]
189
+ if case == 1:
190
+ kwargs = dict(complexity_of_variables=[2, 3])
191
+ elif case == 2:
192
+ kwargs = dict(complexity_of_variables=2)
193
+
194
+ if outer:
195
+ outer_kwargs = kwargs
196
+ inner_kwargs = dict()
197
+ else:
198
+ outer_kwargs = dict()
199
+ inner_kwargs = kwargs
200
+
201
+ model = PySRRegressor(
202
+ binary_operators=["+"],
203
+ verbosity=0,
204
+ **self.default_test_kwargs,
205
+ early_stop_condition=(
206
+ f"stop_if_{case}(l, c) = l < 1e-8 && c <= {3 if case == 1 else 2}"
207
+ ),
208
+ **outer_kwargs,
209
+ )
210
+ model.fit(self.X[:, [0, 1]], y, **inner_kwargs)
211
+ self.assertLessEqual(model.get_best()[0]["loss"], 1e-8)
212
+ self.assertLessEqual(model.get_best()[1]["loss"], 1e-8)
213
+
214
+ self.assertEqual(model.get_best()[0]["complexity"], 2)
215
+ self.assertEqual(
216
+ model.get_best()[1]["complexity"], 3 if case == 1 else 2
217
+ )
218
+
219
+ def test_error_message_custom_variable_complexity(self):
220
+ X = np.ones((10, 2))
221
+ y = np.ones((10,))
222
+ model = PySRRegressor()
223
+ with self.assertRaises(ValueError) as cm:
224
+ model.fit(X, y, complexity_of_variables=[1, 2, 3])
225
+
226
+ self.assertIn(
227
+ "number of elements in `complexity_of_variables`", str(cm.exception)
228
+ )
229
+
230
+ def test_error_message_both_variable_complexity(self):
231
+ X = np.ones((10, 2))
232
+ y = np.ones((10,))
233
+ model = PySRRegressor(complexity_of_variables=[1, 2])
234
+ with self.assertRaises(ValueError) as cm:
235
+ model.fit(X, y, complexity_of_variables=[1, 2, 3])
236
+
237
+ self.assertIn(
238
+ "You cannot set `complexity_of_variables` at both `fit` and `__init__`.",
239
+ str(cm.exception),
240
+ )
241
+
242
  def test_multioutput_weighted_with_callable_temp_equation(self):
243
  X = self.X.copy()
244
  y = X[:, [0, 1]] ** 2
 
376
  "unused_feature": self.rstate.randn(500),
377
  }
378
  )
379
+
380
+ def true_fn(x):
381
+ return np.array(x["T"] + x["x"] ** 2 + 1.323837)
382
+
383
  y = true_fn(X)
384
  noise = self.rstate.randn(500) * 0.01
385
  y = y + noise
 
438
 
439
  def test_load_model(self):
440
  """See if we can load a ran model from the equation file."""
441
+ csv_file_data = """Complexity,Loss,Equation
 
442
  1,0.19951081,"1.9762075"
443
  3,0.12717344,"(f0 + 1.4724599)"
444
  4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
445
  # Strip the indents:
446
+ csv_file_data = "\n".join([line.strip() for line in csv_file_data.split("\n")])
447
 
448
  for from_backup in [False, True]:
449
  rand_dir = Path(tempfile.mkdtemp())
 
495
  if os.path.exists(file_to_delete):
496
  os.remove(file_to_delete)
497
 
498
+ # pickle_file = rand_dir / "equations.pkl"
499
  model3 = PySRRegressor.from_file(
500
  model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
501
  )
502
  np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
503
 
504
+ def test_jl_function_error(self):
505
+ # TODO: Move this to better class
506
+ with self.assertRaises(ValueError) as cm:
507
+ PySRRegressor(unary_operators=["1"]).fit([[1]], [1])
508
+
509
+ self.assertIn(
510
+ "When building `unary_operators`, `'1'` did not return a Julia function",
511
+ str(cm.exception),
512
+ )
513
+
514
 
515
  def manually_create_model(equations, feature_names=None):
516
  if feature_names is None:
 
606
  X = self.rstate.randn(20000, 5)
607
  y = X[:, 2] ** 2 + X[:, 3] ** 2
608
  selected = run_feature_selection(X, y, select_k_features=2)
609
+ np.testing.assert_array_equal(selected, [False, False, True, True, False])
610
 
611
  def test_feature_selection_handler(self):
612
  X = self.rstate.randn(20000, 5)
 
618
  variable_names=var_names,
619
  y=y,
620
  )
621
+ np.testing.assert_array_equal(selection, [False, False, True, True, False])
622
+ selected_var_names = [var_names[i] for i in range(5) if selection[i]]
623
  self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
624
  np.testing.assert_array_equal(
625
  np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
 
643
  test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
644
  self.assertEqual(test_pkl_file, str(expected_pkl_file))
645
 
646
+ def test_pickle_with_temp_equation_file(self):
647
+ """If we have a temporary equation file, unpickle the estimator."""
648
+ model = PySRRegressor(
649
+ populations=int(1 + DEFAULT_POPULATIONS / 5),
650
+ temp_equation_file=True,
651
+ procs=0,
652
+ multithreading=False,
653
+ )
654
+ nout = 3
655
+ X = np.random.randn(100, 2)
656
+ y = np.random.randn(100, nout)
657
+ model.fit(X, y)
658
+ contents = model.equation_file_contents_.copy()
659
+
660
+ y_predictions = model.predict(X)
661
+
662
+ equation_file_base = model.equation_file_
663
+ for i in range(1, nout + 1):
664
+ assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
665
+
666
+ with tempfile.NamedTemporaryFile() as pickle_file:
667
+ pkl.dump(model, pickle_file)
668
+ pickle_file.seek(0)
669
+ model2 = pkl.load(pickle_file)
670
+
671
+ contents2 = model2.equation_file_contents_
672
+ cols_to_check = ["equation", "loss", "complexity"]
673
+ for frame1, frame2 in zip(contents, contents2):
674
+ pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
675
+
676
+ y_predictions2 = model2.predict(X)
677
+ np.testing.assert_array_almost_equal(y_predictions, y_predictions2)
678
+
679
+ def test_scikit_learn_compatibility(self):
680
+ """Test PySRRegressor compatibility with scikit-learn."""
681
+ model = PySRRegressor(
682
+ niterations=int(1 + DEFAULT_NITERATIONS / 10),
683
+ populations=int(1 + DEFAULT_POPULATIONS / 3),
684
+ ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
685
+ verbosity=0,
686
+ progress=False,
687
+ random_state=0,
688
+ deterministic=True, # Deterministic as tests require this.
689
+ procs=0,
690
+ multithreading=False,
691
+ warm_start=False,
692
+ temp_equation_file=True,
693
+ ) # Return early.
694
+
695
+ check_generator = check_estimator(model, generate_only=True)
696
+ exception_messages = []
697
+ for _, check in check_generator:
698
+ if check.func.__name__ == "check_complex_data":
699
+ # We can use complex data, so avoid this check.
700
+ continue
701
+ try:
702
+ with warnings.catch_warnings():
703
+ warnings.simplefilter("ignore")
704
+ check(model)
705
+ print("Passed", check.func.__name__)
706
+ except Exception:
707
+ error_message = str(traceback.format_exc())
708
+ exception_messages.append(
709
+ f"{check.func.__name__}:\n" + error_message + "\n"
710
+ )
711
+ print("Failed", check.func.__name__, "with:")
712
+ # Add a leading tab to error message, which
713
+ # might be multi-line:
714
+ print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
715
+ # If any checks failed don't let the test pass.
716
+ self.assertEqual(len(exception_messages), 0)
717
+
718
+ def test_param_groupings(self):
719
+ """Test that param_groupings are complete"""
720
+ param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
721
+ if not param_groupings_file.exists():
722
+ return
723
+
724
+ # Read the file, discarding lines ending in ":",
725
+ # and removing leading "\s*-\s*":
726
+ params = []
727
+ with open(param_groupings_file, "r") as f:
728
+ for line in f.readlines():
729
+ if line.strip().endswith(":"):
730
+ continue
731
+ if line.strip().startswith("-"):
732
+ params.append(line.strip()[1:].strip())
733
+
734
+ regressor_params = [
735
+ p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
736
+ ]
737
+
738
+ # Check the sets are equal:
739
+ self.assertSetEqual(set(params), set(regressor_params))
740
+
741
+
742
+ class TestHelpMessages(unittest.TestCase):
743
+ """Test user help messages."""
744
+
745
  def test_deprecation(self):
746
  """Ensure that deprecation works as expected.
747
 
 
884
  model.get_best()
885
  print("Failed", opt["kwargs"])
886
 
887
+ def test_suggest_keywords(self):
888
+ # Easy
889
+ self.assertEqual(
890
+ _suggest_keywords(PySRRegressor, "loss_function"), ["loss_function"]
 
 
 
891
  )
 
 
 
 
 
892
 
893
+ # More complex, and with error
894
+ with self.assertRaises(TypeError) as cm:
895
+ model = PySRRegressor(ncyclesperiterationn=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
896
 
897
+ self.assertIn(
898
+ "`ncyclesperiterationn` is not a valid keyword", str(cm.exception)
899
+ )
900
+ self.assertIn("Did you mean", str(cm.exception))
901
+ self.assertIn("`ncycles_per_iteration`, ", str(cm.exception))
902
+ self.assertIn("`niterations`", str(cm.exception))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
903
 
904
+ # Farther matches (this might need to be changed)
905
+ with self.assertRaises(TypeError) as cm:
906
+ model = PySRRegressor(operators=["+", "-"])
907
 
908
+ self.assertIn("`unary_operators`, `binary_operators`", str(cm.exception))
 
909
 
910
 
911
  TRUE_PREAMBLE = "\n".join(
 
1039
  middle_part_2 = r"""
1040
  $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
1041
  $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
1042
+ $y_{1} = x_{0} x_{0} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
1043
  """
1044
  true_latex_table_str = "\n\n".join(
1045
  self.create_true_latex(part, include_score=True)
 
1092
  middle_part = r"""
1093
  $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
1094
  $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
1095
+ \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0} x_{0} x_{0} + x_{0} x_{0} x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + 5.20 \sin{\left(- 2.60 x_{0} + 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0} x_{0} x_{0} + 3.20 x_{0} - 1.20 x_{1} + x_{1} x_{1} x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
1096
  """
1097
  true_latex_table_str = (
1098
  TRUE_PREAMBLE
 
1146
  """This just checks the number of units passed"""
1147
  use_custom_variable_names = False
1148
  variable_names = None
1149
+ complexity_of_variables = 1
1150
  weights = None
1151
+ args = (
1152
+ use_custom_variable_names,
1153
+ variable_names,
1154
+ complexity_of_variables,
1155
+ weights,
1156
+ )
1157
  valid_units = [
1158
  (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"),
1159
  (np.ones((10, 1)), np.ones(10), ["m/s"], None),
 
1261
  TestBest,
1262
  TestFeatureSelection,
1263
  TestMiscellaneous,
1264
+ TestHelpMessages,
1265
  TestLaTeXTable,
1266
  TestDimensionalConstraints,
1267
  ]
pysr/test/test_jax.py CHANGED
@@ -5,27 +5,29 @@ import numpy as np
5
  import pandas as pd
6
  import sympy
7
 
8
- from .. import PySRRegressor, sympy2jax
 
9
 
10
 
11
  class TestJAX(unittest.TestCase):
12
  def setUp(self):
13
  np.random.seed(0)
 
 
 
14
 
15
  def test_sympy2jax(self):
16
- from jax import numpy as jnp
17
  from jax import random
18
 
19
  x, y, z = sympy.symbols("x y z")
20
  cosx = 1.0 * sympy.cos(x) + y
21
  key = random.PRNGKey(0)
22
  X = random.normal(key, (1000, 2))
23
- true = 1.0 * jnp.cos(X[:, 0]) + X[:, 1]
24
  f, params = sympy2jax(cosx, [x, y, z])
25
- self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
26
 
27
  def test_pipeline_pandas(self):
28
- from jax import numpy as jnp
29
 
30
  X = pd.DataFrame(np.random.randn(100, 10))
31
  y = np.ones(X.shape[0])
@@ -52,14 +54,12 @@ class TestJAX(unittest.TestCase):
52
  jformat = model.jax()
53
 
54
  np.testing.assert_almost_equal(
55
- np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
56
  np.square(np.cos(X.values[:, 1])), # Select feature 1
57
  decimal=3,
58
  )
59
 
60
  def test_pipeline(self):
61
- from jax import numpy as jnp
62
-
63
  X = np.random.randn(100, 10)
64
  y = np.ones(X.shape[0])
65
  model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
@@ -81,15 +81,46 @@ class TestJAX(unittest.TestCase):
81
  jformat = model.jax()
82
 
83
  np.testing.assert_almost_equal(
84
- np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
85
  np.square(np.cos(X[:, 1])), # Select feature 1
86
  decimal=3,
87
  )
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def test_feature_selection_custom_operators(self):
90
  rstate = np.random.RandomState(0)
91
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
92
- cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
 
 
 
93
  y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
94
 
95
  model = PySRRegressor(
 
5
  import pandas as pd
6
  import sympy
7
 
8
+ import pysr
9
+ from pysr import PySRRegressor, sympy2jax
10
 
11
 
12
  class TestJAX(unittest.TestCase):
13
  def setUp(self):
14
  np.random.seed(0)
15
+ from jax import numpy as jnp
16
+
17
+ self.jnp = jnp
18
 
19
  def test_sympy2jax(self):
 
20
  from jax import random
21
 
22
  x, y, z = sympy.symbols("x y z")
23
  cosx = 1.0 * sympy.cos(x) + y
24
  key = random.PRNGKey(0)
25
  X = random.normal(key, (1000, 2))
26
+ true = 1.0 * self.jnp.cos(X[:, 0]) + X[:, 1]
27
  f, params = sympy2jax(cosx, [x, y, z])
28
+ self.assertTrue(self.jnp.all(self.jnp.isclose(f(X, params), true)).item())
29
 
30
  def test_pipeline_pandas(self):
 
31
 
32
  X = pd.DataFrame(np.random.randn(100, 10))
33
  y = np.ones(X.shape[0])
 
54
  jformat = model.jax()
55
 
56
  np.testing.assert_almost_equal(
57
+ np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
58
  np.square(np.cos(X.values[:, 1])), # Select feature 1
59
  decimal=3,
60
  )
61
 
62
  def test_pipeline(self):
 
 
63
  X = np.random.randn(100, 10)
64
  y = np.ones(X.shape[0])
65
  model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
 
81
  jformat = model.jax()
82
 
83
  np.testing.assert_almost_equal(
84
+ np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
85
  np.square(np.cos(X[:, 1])), # Select feature 1
86
  decimal=3,
87
  )
88
 
89
+ def test_avoid_simplification(self):
90
+ ex = pysr.export_sympy.pysr2sympy(
91
+ "square(exp(sign(0.44796443))) + 1.5 * x1",
92
+ feature_names_in=["x1"],
93
+ extra_sympy_mappings={"square": lambda x: x**2},
94
+ )
95
+ f, params = pysr.export_jax.sympy2jax(ex, [sympy.symbols("x1")])
96
+ key = np.random.RandomState(0)
97
+ X = key.randn(10, 1)
98
+ np.testing.assert_almost_equal(
99
+ np.array(f(self.jnp.array(X), params)),
100
+ np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
101
+ decimal=3,
102
+ )
103
+
104
+ def test_issue_656(self):
105
+ import sympy
106
+
107
+ E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
108
+ f, params = pysr.export_jax.sympy2jax(E_plus_x1, [sympy.symbols("x1")])
109
+ key = np.random.RandomState(0)
110
+ X = key.randn(10, 1)
111
+ np.testing.assert_almost_equal(
112
+ np.array(f(self.jnp.array(X), params)),
113
+ np.exp(1) + X[:, 0],
114
+ decimal=3,
115
+ )
116
+
117
  def test_feature_selection_custom_operators(self):
118
  rstate = np.random.RandomState(0)
119
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
120
+
121
+ def cos_approx(x):
122
+ return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
123
+
124
  y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
125
 
126
  model = PySRRegressor(
pysr/test/test_startup.py CHANGED
@@ -9,8 +9,9 @@ from pathlib import Path
9
 
10
  import numpy as np
11
 
12
- from .. import PySRRegressor
13
- from ..julia_import import jl_version
 
14
  from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
15
 
16
 
@@ -118,10 +119,6 @@ class TestStartup(unittest.TestCase):
118
  code="import juliacall; import pysr",
119
  msg="juliacall module already imported.",
120
  ),
121
- dict(
122
- code='import os; os.environ["PYSR_AUTOLOAD_EXTENSIONS"] = "foo"; import pysr',
123
- msg="PYSR_AUTOLOAD_EXTENSIONS environment variable is set",
124
- ),
125
  ]
126
  for warning_test in warning_tests:
127
  result = subprocess.run(
 
9
 
10
  import numpy as np
11
 
12
+ from pysr import PySRRegressor
13
+ from pysr.julia_import import jl_version
14
+
15
  from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
16
 
17
 
 
119
  code="import juliacall; import pysr",
120
  msg="juliacall module already imported.",
121
  ),
 
 
 
 
122
  ]
123
  for warning_test in warning_tests:
124
  result = subprocess.run(
pysr/test/test_torch.py CHANGED
@@ -4,7 +4,8 @@ import numpy as np
4
  import pandas as pd
5
  import sympy
6
 
7
- from .. import PySRRegressor, sympy2torch
 
8
 
9
 
10
  class TestTorch(unittest.TestCase):
@@ -153,10 +154,43 @@ class TestTorch(unittest.TestCase):
153
  decimal=3,
154
  )
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  def test_feature_selection_custom_operators(self):
157
  rstate = np.random.RandomState(0)
158
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
159
- cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
 
 
 
160
  y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
161
 
162
  model = PySRRegressor(
 
4
  import pandas as pd
5
  import sympy
6
 
7
+ import pysr
8
+ from pysr import PySRRegressor, sympy2torch
9
 
10
 
11
  class TestTorch(unittest.TestCase):
 
154
  decimal=3,
155
  )
156
 
157
+ def test_avoid_simplification(self):
158
+ # SymPy should not simplify without permission
159
+ torch = self.torch
160
+ ex = pysr.export_sympy.pysr2sympy(
161
+ "square(exp(sign(0.44796443))) + 1.5 * x1",
162
+ # ^ Normally this would become exp1 and require
163
+ # its own mapping
164
+ feature_names_in=["x1"],
165
+ extra_sympy_mappings={"square": lambda x: x**2},
166
+ )
167
+ m = pysr.export_torch.sympy2torch(ex, ["x1"])
168
+ rng = np.random.RandomState(0)
169
+ X = rng.randn(10, 1)
170
+ np.testing.assert_almost_equal(
171
+ m(torch.tensor(X)).detach().numpy(),
172
+ np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
173
+ decimal=3,
174
+ )
175
+
176
+ def test_issue_656(self):
177
+ # Should correctly map numeric symbols to floats
178
+ E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
179
+ m = pysr.export_torch.sympy2torch(E_plus_x1, ["x1"])
180
+ X = np.random.randn(10, 1)
181
+ np.testing.assert_almost_equal(
182
+ m(self.torch.tensor(X)).detach().numpy(),
183
+ np.exp(1) + X[:, 0],
184
+ decimal=3,
185
+ )
186
+
187
  def test_feature_selection_custom_operators(self):
188
  rstate = np.random.RandomState(0)
189
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
190
+
191
+ def cos_approx(x):
192
+ return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
193
+
194
  y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
195
 
196
  model = PySRRegressor(
pysr/utils.py CHANGED
@@ -1,10 +1,20 @@
 
 
1
  import os
2
  import re
 
 
3
 
4
- from sklearn.utils.validation import _check_feature_names_in
 
5
 
 
6
 
7
- def _csv_filename_to_pkl_filename(csv_filename: str) -> str:
 
 
 
 
8
  if os.path.splitext(csv_filename)[1] == ".pkl":
9
  return csv_filename
10
 
@@ -53,3 +63,13 @@ def _subscriptify(i: int) -> str:
53
  For example, 123 -> "₁₂₃".
54
  """
55
  return "".join([chr(0x2080 + int(c)) for c in str(i)])
 
 
 
 
 
 
 
 
 
 
 
1
+ import difflib
2
+ import inspect
3
  import os
4
  import re
5
+ from pathlib import Path
6
+ from typing import Any, List, TypeVar, Union
7
 
8
+ from numpy import ndarray
9
+ from sklearn.utils.validation import _check_feature_names_in # type: ignore
10
 
11
+ T = TypeVar("T", bound=Any)
12
 
13
+ ArrayLike = Union[ndarray, List[T]]
14
+ PathLike = Union[str, Path]
15
+
16
+
17
+ def _csv_filename_to_pkl_filename(csv_filename: PathLike) -> PathLike:
18
  if os.path.splitext(csv_filename)[1] == ".pkl":
19
  return csv_filename
20
 
 
63
  For example, 123 -> "₁₂₃".
64
  """
65
  return "".join([chr(0x2080 + int(c)) for c in str(i)])
66
+
67
+
68
+ def _suggest_keywords(cls, k: str) -> List[str]:
69
+ valid_keywords = [
70
+ param
71
+ for param in inspect.signature(cls.__init__).parameters
72
+ if param not in ["self", "kwargs"]
73
+ ]
74
+ suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
75
+ return suggestions
requirements.txt CHANGED
@@ -1,8 +1,7 @@
1
  sympy>=1.0.0,<2.0.0
2
  pandas>=0.21.0,<3.0.0
3
- numpy>=1.13.0,<2.0.0
4
  scikit_learn>=1.0.0,<2.0.0
5
- juliacall==0.9.19
6
  click>=7.0.0,<9.0.0
7
  setuptools>=50.0.0
8
- typing_extensions>=4.0.0,<5.0.0; python_version < "3.8"
 
1
  sympy>=1.0.0,<2.0.0
2
  pandas>=0.21.0,<3.0.0
3
+ numpy>=1.13.0,<3.0.0
4
  scikit_learn>=1.0.0,<2.0.0
5
+ juliacall==0.9.20
6
  click>=7.0.0,<9.0.0
7
  setuptools>=50.0.0