MilesCranmer commited on
Commit
8c81191
·
unverified ·
2 Parent(s): edab547 c4e5886

Merge pull request #535 from MilesCranmer/pythoncall-try-3

Browse files

(BREAKING) Rewrite Julia interface with PyJulia -> JuliaCall; other changes

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .coveragerc +4 -0
  2. .github/workflows/CI.yml +40 -23
  3. .github/workflows/CI_Windows.yml +5 -8
  4. .github/workflows/CI_conda_forge.yml +3 -1
  5. .github/workflows/CI_docker.yml +1 -1
  6. .github/workflows/CI_docker_large_nightly.yml +1 -1
  7. .github/workflows/CI_large_nightly.yml +7 -9
  8. .github/workflows/CI_mac.yml +6 -9
  9. .pre-commit-config.yaml +2 -0
  10. CONTRIBUTORS.md +1 -1
  11. Dockerfile +6 -5
  12. README.md +12 -38
  13. TODO.md +0 -142
  14. datasets/FeynmanEquations.csv +0 -101
  15. docs/backend.md +61 -15
  16. docs/examples.md +10 -12
  17. docs/options.md +9 -9
  18. docs/tuning.md +2 -2
  19. environment.yml +8 -9
  20. example.py +1 -1
  21. examples/pysr_demo.ipynb +60 -109
  22. pyproject.toml +29 -0
  23. pysr/.gitignore +1 -0
  24. pysr/__init__.py +9 -12
  25. pysr/__main__.py +1 -1
  26. pysr/_cli/main.py +57 -7
  27. pysr/deprecated.py +46 -35
  28. pysr/feynman_problems.py +0 -176
  29. pysr/julia_helpers.py +27 -326
  30. pysr/julia_import.py +76 -0
  31. pysr/juliapkg.json +21 -0
  32. pysr/param_groupings.yml +3 -5
  33. pysr/sklearn_monkeypatch.py +1 -1
  34. pysr/sr.py +120 -116
  35. pysr/test/__init__.py +11 -3
  36. pysr/test/__main__.py +1 -31
  37. pysr/test/generate_dev_juliapkg.py +17 -0
  38. pysr/test/incremental_install_simulator.dockerfile +0 -52
  39. pysr/test/nb_sanitize.cfg +3 -0
  40. pysr/test/params.py +8 -0
  41. pysr/test/test.py +102 -40
  42. pysr/test/test_cli.py +79 -55
  43. pysr/test/test_dev.py +59 -0
  44. pysr/test/test_dev_pysr.dockerfile +57 -0
  45. pysr/test/test_env.py +0 -58
  46. pysr/test/test_jax.py +6 -2
  47. pysr/test/test_nb.ipynb +536 -0
  48. pysr/test/test_startup.py +164 -0
  49. pysr/test/test_torch.py +21 -35
  50. pysr/version.py +0 -2
.coveragerc ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [run]
2
+ omit =
3
+ */test/*
4
+ source = pysr
.github/workflows/CI.yml CHANGED
@@ -24,6 +24,8 @@ jobs:
24
  test:
25
  runs-on: ${{ matrix.os }}
26
  timeout-minutes: 60
 
 
27
  defaults:
28
  run:
29
  shell: bash
@@ -38,6 +40,10 @@ jobs:
38
  python-version: '3.7'
39
  os: ubuntu-latest
40
  test-id: include
 
 
 
 
41
 
42
  steps:
43
  - uses: actions/checkout@v4
@@ -58,29 +64,29 @@ jobs:
58
  - name: "Install PySR"
59
  run: |
60
  python -m pip install --upgrade pip
61
- pip install -r requirements.txt
62
- python setup.py install
63
- python -m pysr install
64
- - name: "Install Coverage tool"
65
- run: pip install coverage coveralls
 
 
 
 
66
  - name: "Run tests"
67
- run: |
68
- coverage run --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test main
69
- coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test cli
70
  - name: "Install JAX"
71
  run: pip install jax jaxlib # (optional import)
72
  if: ${{ matrix.test-id == 'main' }}
73
  - name: "Run JAX tests"
74
- run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test jax
75
  if: ${{ matrix.test-id == 'main' }}
76
  - name: "Install Torch"
77
  run: pip install torch # (optional import)
78
  if: ${{ matrix.test-id == 'main' }}
79
  - name: "Run Torch tests"
80
- run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test torch
81
  if: ${{ matrix.test-id == 'main' }}
82
- - name: "Run custom env tests"
83
- run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test env
84
  - name: "Coveralls"
85
  env:
86
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -88,14 +94,26 @@ jobs:
88
  COVERALLS_PARALLEL: true
89
  run: coveralls --service=github
90
 
91
- incremental_install:
92
- runs-on: ubuntu-latest
 
 
 
 
 
 
 
 
 
93
  steps:
94
  - uses: actions/checkout@v4
95
- - name: "Build incremental install"
96
- run: docker build -t pysr -f pysr/test/incremental_install_simulator.dockerfile .
97
- - name: "Test incremental install"
98
- run: docker run --rm pysr /bin/bash -l -c 'python3 -m pysr.test main && python3 -m pysr.test env'
 
 
 
99
 
100
  conda_test:
101
  runs-on: ${{ matrix.os }}
@@ -133,9 +151,9 @@ jobs:
133
  - name: "Install PySR"
134
  run: |
135
  python3 -m pip install .
136
- python3 -m pysr install
137
  - name: "Run tests"
138
- run: cd /tmp && python -m pysr.test main
139
 
140
  coveralls:
141
  name: Indicate completion to coveralls.io
@@ -177,9 +195,8 @@ jobs:
177
  - name: "Install PySR and all dependencies"
178
  run: |
179
  python -m pip install --upgrade pip
180
- python -m pip install -r requirements.txt
181
- python -m pip install mypy
182
- python -m pip install .
183
  - name: "Install additional dependencies"
184
  run: python -m pip install jax jaxlib torch
185
  if: ${{ matrix.python-version != '3.7' }}
 
24
  test:
25
  runs-on: ${{ matrix.os }}
26
  timeout-minutes: 60
27
+ env:
28
+ COVERAGE_PROCESS_START: "${{ github.workspace }}/.coveragerc"
29
  defaults:
30
  run:
31
  shell: bash
 
40
  python-version: '3.7'
41
  os: ubuntu-latest
42
  test-id: include
43
+ - julia-version: '1'
44
+ python-version: '3.12'
45
+ os: ubuntu-latest
46
+ test-id: include
47
 
48
  steps:
49
  - uses: actions/checkout@v4
 
64
  - name: "Install PySR"
65
  run: |
66
  python -m pip install --upgrade pip
67
+ pip install .
68
+ python -c 'import pysr'
69
+ - name: "Assert Julia version"
70
+ if: ${{ matrix.julia-version != '1'}}
71
+ run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
72
+ - name: "Install test dependencies"
73
+ run: pip install coverage coveralls pytest nbval
74
+ - name: "Set up coverage for subprocesses"
75
+ run: echo 'import coverage; coverage.process_startup()' > "${{ github.workspace }}/sitecustomize.py"
76
  - name: "Run tests"
77
+ run: coverage run -m pysr test main,cli,startup
 
 
78
  - name: "Install JAX"
79
  run: pip install jax jaxlib # (optional import)
80
  if: ${{ matrix.test-id == 'main' }}
81
  - name: "Run JAX tests"
82
+ run: coverage run --append -m pysr test jax
83
  if: ${{ matrix.test-id == 'main' }}
84
  - name: "Install Torch"
85
  run: pip install torch # (optional import)
86
  if: ${{ matrix.test-id == 'main' }}
87
  - name: "Run Torch tests"
88
+ run: coverage run --append -m pysr test torch
89
  if: ${{ matrix.test-id == 'main' }}
 
 
90
  - name: "Coveralls"
91
  env:
92
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
94
  COVERALLS_PARALLEL: true
95
  run: coveralls --service=github
96
 
97
+ dev_install:
98
+ runs-on: ${{ matrix.os }}
99
+ strategy:
100
+ matrix:
101
+ os: ['ubuntu-latest']
102
+ python-version: ['3.11']
103
+ julia-version: ['1']
104
+ include:
105
+ - os: ubuntu-latest
106
+ python-version: '3.7'
107
+ julia-version: '1.6'
108
  steps:
109
  - uses: actions/checkout@v4
110
+ - uses: actions/setup-python@v5
111
+ - name: "Install PySR"
112
+ run: |
113
+ python -m pip install --upgrade pip
114
+ pip install .
115
+ - name: "Run development test"
116
+ run: PYSR_TEST_JULIA_VERSION=${{ matrix.julia-version }} PYSR_TEST_PYTHON_VERSION=${{ matrix.python-version }} python -m pysr test dev
117
 
118
  conda_test:
119
  runs-on: ${{ matrix.os }}
 
151
  - name: "Install PySR"
152
  run: |
153
  python3 -m pip install .
154
+ python3 -c 'import pysr'
155
  - name: "Run tests"
156
+ run: cd /tmp && python -m pysr test main
157
 
158
  coveralls:
159
  name: Indicate completion to coveralls.io
 
195
  - name: "Install PySR and all dependencies"
196
  run: |
197
  python -m pip install --upgrade pip
198
+ pip install .
199
+ pip install mypy
 
200
  - name: "Install additional dependencies"
201
  run: python -m pip install jax jaxlib torch
202
  if: ${{ matrix.python-version != '3.7' }}
.github/workflows/CI_Windows.yml CHANGED
@@ -52,16 +52,13 @@ jobs:
52
  - name: "Install PySR"
53
  run: |
54
  python -m pip install --upgrade pip
55
- pip install -r requirements.txt
56
- python setup.py install
57
- python -m pysr install
58
  - name: "Run tests"
59
  run: |
60
- python -m pysr.test main
61
- python -m pysr.test cli
62
  - name: "Install Torch"
63
  run: pip install torch # (optional import)
64
  - name: "Run Torch tests"
65
- run: python -m pysr.test torch
66
- - name: "Run custom env tests"
67
- run: python -m pysr.test env
 
52
  - name: "Install PySR"
53
  run: |
54
  python -m pip install --upgrade pip
55
+ pip install pytest nbval
56
+ pip install .
57
+ python -c 'import pysr'
58
  - name: "Run tests"
59
  run: |
60
+ python -m pysr test main,cli,startup
 
61
  - name: "Install Torch"
62
  run: pip install torch # (optional import)
63
  - name: "Run Torch tests"
64
+ run: python -m pysr test torch
 
 
.github/workflows/CI_conda_forge.yml CHANGED
@@ -40,4 +40,6 @@ jobs:
40
  run: conda activate pysr-test && conda install pysr
41
  if: ${{ !matrix.use-mamba }}
42
  - name: "Run tests"
43
- run: python -m pysr.test main
 
 
 
40
  run: conda activate pysr-test && conda install pysr
41
  if: ${{ !matrix.use-mamba }}
42
  - name: "Run tests"
43
+ run: |
44
+ pip install pytest nbval
45
+ python -m pysr test main,startup
.github/workflows/CI_docker.yml CHANGED
@@ -37,4 +37,4 @@ jobs:
37
  - name: Build docker
38
  run: docker build --platform=${{ matrix.arch }} -t pysr .
39
  - name: Test docker
40
- run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'python3 -m pysr.test main && python3 -m pysr.test cli && python3 -m pysr.test env'
 
37
  - name: Build docker
38
  run: docker build --platform=${{ matrix.arch }} -t pysr .
39
  - name: Test docker
40
+ run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
.github/workflows/CI_docker_large_nightly.yml CHANGED
@@ -33,4 +33,4 @@ jobs:
33
  - name: Build docker
34
  run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} .
35
  - name: Test docker
36
- run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'python3 -m pysr.test main && python3 -m pysr.test cli && python3 -m pysr.test env'
 
33
  - name: Build docker
34
  run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} .
35
  - name: Test docker
36
+ run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
.github/workflows/CI_large_nightly.yml CHANGED
@@ -40,13 +40,11 @@ jobs:
40
  - name: "Install PySR"
41
  run: |
42
  python -m pip install --upgrade pip
43
- pip install -r requirements.txt
44
- python setup.py install
45
- python -m pysr install
 
 
 
46
  - name: "Run tests"
47
- run: |
48
- python -m pysr.test main
49
- python -m pysr.test cli
50
- - name: "Run new env test"
51
- run: python -m pysr.test env
52
- if: ${{ !(matrix.os == 'windows-latest' && matrix.python-version == '3.7') }}
 
40
  - name: "Install PySR"
41
  run: |
42
  python -m pip install --upgrade pip
43
+ pip install pytest nbval
44
+ pip install .
45
+ python -c 'import pysr'
46
+ - name: "Assert Julia version"
47
+ if: ${{ matrix.julia-version != '1'}}
48
+ run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
49
  - name: "Run tests"
50
+ run: python -m pysr test main,cli,startup
 
 
 
 
 
.github/workflows/CI_mac.yml CHANGED
@@ -52,20 +52,17 @@ jobs:
52
  - name: "Install PySR"
53
  run: |
54
  python -m pip install --upgrade pip
55
- pip install -r requirements.txt
56
- python setup.py install
57
- python -m pysr install
58
  - name: "Run tests"
59
  run: |
60
- python -m pysr.test main
61
- python -m pysr.test cli
62
  - name: "Install JAX"
63
  run: pip install jax jaxlib # (optional import)
64
  - name: "Run JAX tests"
65
- run: python -m pysr.test jax
66
  - name: "Install Torch"
67
  run: pip install torch # (optional import)
68
  - name: "Run Torch tests"
69
- run: python -m pysr.test torch
70
- - name: "Run custom env tests"
71
- run: python -m pysr.test env
 
52
  - name: "Install PySR"
53
  run: |
54
  python -m pip install --upgrade pip
55
+ pip install pytest nbval
56
+ pip install .
57
+ python -c 'import pysr'
58
  - name: "Run tests"
59
  run: |
60
+ python -m pysr test main,cli,startup
 
61
  - name: "Install JAX"
62
  run: pip install jax jaxlib # (optional import)
63
  - name: "Run JAX tests"
64
+ run: python -m pysr test jax
65
  - name: "Install Torch"
66
  run: pip install torch # (optional import)
67
  - name: "Run Torch tests"
68
+ run: python -m pysr test torch
 
 
.pre-commit-config.yaml CHANGED
@@ -13,11 +13,13 @@ repos:
13
  hooks:
14
  - id: black
15
  - id: black-jupyter
 
16
  # Stripping notebooks
17
  - repo: https://github.com/kynan/nbstripout
18
  rev: 0.6.1
19
  hooks:
20
  - id: nbstripout
 
21
  # Unused imports
22
  - repo: https://github.com/hadialqattan/pycln
23
  rev: "v2.4.0"
 
13
  hooks:
14
  - id: black
15
  - id: black-jupyter
16
+ exclude: pysr/test/test_nb.ipynb
17
  # Stripping notebooks
18
  - repo: https://github.com/kynan/nbstripout
19
  rev: 0.6.1
20
  hooks:
21
  - id: nbstripout
22
+ exclude: pysr/test/test_nb.ipynb
23
  # Unused imports
24
  - repo: https://github.com/hadialqattan/pycln
25
  rev: "v2.4.0"
CONTRIBUTORS.md CHANGED
@@ -42,7 +42,7 @@ Scan through our [existing issues](https://github.com/MilesCranmer/PySR/issues)
42
  check out the [guide](https://astroautomata.com/PySR/backend/) on modifying a custom SymbolicRegression.jl library.
43
  In this case, you might instead be interested in making suggestions to the [SymbolicRegression.jl](http://github.com/MilesCranmer/SymbolicRegression.jl) library.
44
 
45
- 4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr.test main`.
46
 
47
  ### Commit your update
48
 
 
42
  check out the [guide](https://astroautomata.com/PySR/backend/) on modifying a custom SymbolicRegression.jl library.
43
  In this case, you might instead be interested in making suggestions to the [SymbolicRegression.jl](http://github.com/MilesCranmer/SymbolicRegression.jl) library.
44
 
45
+ 4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr test main`.
46
 
47
  ### Commit your update
48
 
Dockerfile CHANGED
@@ -13,22 +13,23 @@ COPY --from=jl /usr/local/julia /usr/local/julia
13
  ENV PATH="/usr/local/julia/bin:${PATH}"
14
 
15
  # Install IPython and other useful libraries:
16
- RUN pip install ipython matplotlib
17
 
18
  WORKDIR /pysr
19
 
20
  # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
21
  ADD ./requirements.txt /pysr/requirements.txt
22
- RUN pip3 install -r /pysr/requirements.txt
23
 
24
  # Install PySR:
25
  # We do a minimal copy so it doesn't need to rerun at every file change:
 
26
  ADD ./setup.py /pysr/setup.py
27
- ADD ./pysr/ /pysr/pysr/
28
- RUN pip3 install .
29
 
30
  # Install Julia pre-requisites:
31
- RUN python3 -m pysr install
32
 
33
  # metainformation
34
  LABEL org.opencontainers.image.authors = "Miles Cranmer"
 
13
  ENV PATH="/usr/local/julia/bin:${PATH}"
14
 
15
  # Install IPython and other useful libraries:
16
+ RUN pip install --no-cache-dir ipython matplotlib
17
 
18
  WORKDIR /pysr
19
 
20
  # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
21
  ADD ./requirements.txt /pysr/requirements.txt
22
+ RUN pip3 install --no-cache-dir -r /pysr/requirements.txt
23
 
24
  # Install PySR:
25
  # We do a minimal copy so it doesn't need to rerun at every file change:
26
+ ADD ./pyproject.toml /pysr/pyproject.toml
27
  ADD ./setup.py /pysr/setup.py
28
+ ADD ./pysr /pysr/pysr
29
+ RUN pip3 install --no-cache-dir .
30
 
31
  # Install Julia pre-requisites:
32
+ RUN python3 -c 'import pysr'
33
 
34
  # metainformation
35
  LABEL org.opencontainers.image.authors = "Miles Cranmer"
README.md CHANGED
@@ -27,10 +27,6 @@ If you've finished a project with PySR, please submit a PR to showcase your work
27
  - [Contributors](#contributors-)
28
  - [Why PySR?](#why-pysr)
29
  - [Installation](#installation)
30
- - [pip](#pip)
31
- - [conda](#conda)
32
- - [docker](#docker-build)
33
- - [Troubleshooting](#troubleshooting)
34
  - [Quickstart](#quickstart)
35
  - [→ Documentation](https://astroautomata.com/PySR)
36
 
@@ -129,48 +125,31 @@ an explicit and powerful way to interpret deep neural networks.
129
 
130
  ## Installation
131
 
132
- | [pip](#pip) | [conda](#conda) | [docker](#docker-build) |
133
- |:---:|:---:|:---:|
134
- | Everywhere (recommended) | Linux and Intel-based macOS | Everywhere (if all else fails) |
135
 
136
- ---
137
-
138
- ### pip
139
 
140
- 1. [Install Julia](https://julialang.org/downloads/)
141
- - Alternatively, my personal preference is to use [juliaup](https://github.com/JuliaLang/juliaup#installation), which performs this automatically.
142
- 2. Then, run:
143
  ```bash
144
- pip3 install -U pysr
145
  ```
146
- 3. Finally, to install Julia dependencies:
147
- ```bash
148
- python3 -m pysr install
149
- ```
150
- > (Alternatively, from within Python, you can call `import pysr; pysr.install()`)
151
 
152
- ---
153
 
154
- ### conda
155
 
156
- The PySR build in conda includes all required dependencies, so you can install it by simply running:
157
 
158
  ```bash
159
  conda install -c conda-forge pysr
160
  ```
161
 
162
- from within your target conda environment.
163
 
164
- However, note that the conda install does not support precompilation of Julia libraries, so the
165
- start time may be slightly slower as the JIT-compilation will be running.
166
- (Once the compilation finishes, there will not be a performance difference though.)
167
-
168
- ---
169
 
170
- ### docker build
171
 
172
  1. Clone this repo.
173
- 2. In the repo, run the build command with:
174
  ```bash
175
  docker build -t pysr .
176
  ```
@@ -185,11 +164,7 @@ For more details, see the [docker section](#docker).
185
 
186
  ### Troubleshooting
187
 
188
- Common issues tend to be related to Python not finding Julia.
189
- To debug this, try running `python3 -c 'import os; print(os.environ["PATH"])'`.
190
- If none of these folders contain your Julia binary, then you need to add Julia's `bin` folder to your `PATH` environment variable.
191
-
192
- Another issue you might run into can result in a hard crash at import with
193
  a message like "`GLIBCXX_...` not found". This is due to another one of the Python dependencies
194
  loading an incorrect `libstdc++` library. To fix this, you should modify your
195
  `LD_LIBRARY_PATH` variable to reference the Julia libraries. For example, if the Julia
@@ -202,7 +177,6 @@ export LD_LIBRARY_PATH=$HOME/.julia/juliaup/julia-1.10.0+0.x64.linux.gnu/lib/jul
202
 
203
  to your `.bashrc` or `.zshrc` file.
204
 
205
- **Running PySR on macOS with an M1 processor:** you should use the pip version, and make sure to get the Julia binary for ARM/M-series processors.
206
 
207
  ## Quickstart
208
 
@@ -240,7 +214,7 @@ model = PySRRegressor(
240
  ],
241
  extra_sympy_mappings={"inv": lambda x: 1 / x},
242
  # ^ Define operator for SymPy as well
243
- loss="loss(prediction, target) = (prediction - target)^2",
244
  # ^ Custom loss function (julia syntax)
245
  )
246
  ```
@@ -323,7 +297,7 @@ model = PySRRegressor(
323
  # ^ 2 populations per core, so one is always running.
324
  population_size=50,
325
  # ^ Slightly larger populations, for greater diversity.
326
- ncyclesperiteration=500,
327
  # ^ Generations between migrations.
328
  niterations=10000000, # Run forever
329
  early_stop_condition=(
 
27
  - [Contributors](#contributors-)
28
  - [Why PySR?](#why-pysr)
29
  - [Installation](#installation)
 
 
 
 
30
  - [Quickstart](#quickstart)
31
  - [→ Documentation](https://astroautomata.com/PySR)
32
 
 
125
 
126
  ## Installation
127
 
128
+ ### Pip
 
 
129
 
130
+ You can install PySR with pip:
 
 
131
 
 
 
 
132
  ```bash
133
+ pip install pysr
134
  ```
 
 
 
 
 
135
 
136
+ Julia dependencies will be installed at first import.
137
 
138
+ ### Conda
139
 
140
+ Similarly, with conda:
141
 
142
  ```bash
143
  conda install -c conda-forge pysr
144
  ```
145
 
 
146
 
147
+ ### Docker
 
 
 
 
148
 
149
+ You can also use the `Dockerfile` to install PySR in a docker container
150
 
151
  1. Clone this repo.
152
+ 2. Within the repo's directory, build the docker container:
153
  ```bash
154
  docker build -t pysr .
155
  ```
 
164
 
165
  ### Troubleshooting
166
 
167
+ One issue you might run into can result in a hard crash at import with
 
 
 
 
168
  a message like "`GLIBCXX_...` not found". This is due to another one of the Python dependencies
169
  loading an incorrect `libstdc++` library. To fix this, you should modify your
170
  `LD_LIBRARY_PATH` variable to reference the Julia libraries. For example, if the Julia
 
177
 
178
  to your `.bashrc` or `.zshrc` file.
179
 
 
180
 
181
  ## Quickstart
182
 
 
214
  ],
215
  extra_sympy_mappings={"inv": lambda x: 1 / x},
216
  # ^ Define operator for SymPy as well
217
+ elementwise_loss="loss(prediction, target) = (prediction - target)^2",
218
  # ^ Custom loss function (julia syntax)
219
  )
220
  ```
 
297
  # ^ 2 populations per core, so one is always running.
298
  population_size=50,
299
  # ^ Slightly larger populations, for greater diversity.
300
+ ncycles_per_iteration=500,
301
  # ^ Generations between migrations.
302
  niterations=10000000, # Run forever
303
  early_stop_condition=(
TODO.md DELETED
@@ -1,142 +0,0 @@
1
- # TODO
2
-
3
- - [x] Async threading, and have a server of equations. So that threads aren't waiting for others to finish.
4
- - [x] Print out speed of equation evaluation over time. Measure time it takes per cycle
5
- - [x] Add ability to pass an operator as an anonymous function string. E.g., `binary_operators=["g(x, y) = x+y"]`.
6
- - [x] Add error bar capability (thanks Johannes Buchner for suggestion)
7
- - [x] Why don't the constants continually change? It should optimize them every time the equation appears.
8
- - Restart the optimizer to help with this.
9
- - [x] Add several common unary and binary operators; list these.
10
- - [x] Try other initial conditions for optimizer
11
- - [x] Make scaling of changes to constant a hyperparameter
12
- - [x] Make deletion op join deleted subtree to parent
13
- - [x] Update hall of fame every iteration?
14
- - Seems to overfit early if we do this.
15
- - [x] Consider adding mutation to pass an operator in through a new binary operator (e.g., exp(x3)->plus(exp(x3), ...))
16
- - (Added full insertion operator
17
- - [x] Add a node at the top of a tree
18
- - [x] Insert a node at the top of a subtree
19
- - [x] Record very best individual in each population, and return at end.
20
- - [x] Write our own tree copy operation; deepcopy() is the slowest operation by far.
21
- - [x] Hyperparameter tune
22
- - [x] Create a benchmark for accuracy
23
- - [x] Add interface for either defining an operation to learn, or loading in arbitrary dataset.
24
- - Could just write out the dataset in julia, or load it.
25
- - [x] Create a Python interface
26
- - [x] Explicit constant optimization on hall-of-fame
27
- - Create method to find and return all constants, from left to right
28
- - Create method to find and set all constants, in same order
29
- - Pull up some optimization algorithm and add it. Keep the package small!
30
- - [x] Create a benchmark for speed
31
- - [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
32
- - [x] Record hall of fame
33
- - [x] Optionally (with hyperparameter) migrate the hall of fame, rather than current bests
34
- - [x] Test performance of reduced precision integers
35
- - No effect
36
- - [x] Create struct to pass through all hyperparameters, instead of treating as constants
37
- - Make sure doesn't affect performance
38
- - [x] Rename package to avoid trademark issues
39
- - PySR?
40
- - [x] Put on PyPI
41
- - [x] Treat baseline as a solution.
42
- - [x] Print score alongside MSE: \delta \log(MSE)/\delta \log(complexity)
43
- - [x] Calculating the loss function - there is duplicate calculations happening.
44
- - [x] Declaration of the weights array every iteration
45
- - [x] Sympy evaluation
46
- - [x] Threaded recursion
47
- - [x] Test suite
48
- - [x] Performance: - Use an enum for functions instead of storing them?
49
- - Gets ~40% speedup on small test.
50
- - [x] Use @fastmath
51
- - [x] Try @spawn over each sub-population. Do random sort, compute mutation for each, then replace 10% oldest.
52
- - [x] Control max depth, rather than max number of nodes?
53
- - [x] Allow user to pass names for variables - use these when printing
54
- - [x] Check for domain errors in an equation quickly before actually running the entire array over it. (We do this now recursively - every single equation is checked for nans/infs when being computed.)
55
- - [x] read the docs page
56
- - [x] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
57
- - [x] Better cleanup of zombie processes after <ctl-c>
58
- - [x] Consider printing output sorted by score, not by complexity.
59
- - [x] Increase max complexity slowly over time up to the actual max.
60
- - [x] Record density over complexity. Favor equations that have a density we have not explored yet. Want the final density to be evenly distributed.
61
- - [x] Do printing from Python side. Then we can do simplification and pretty-printing.
62
- - [x] Sympy printing
63
- - [x] Store Project.toml inside PySR's python code, rather than copied to site-packages.
64
- - [ ] Sort these todo lists by priority
65
-
66
- - [ ] Automatically convert log, log10, log2, pow to the correct operators.
67
- - [ ] I think the simplification isn't working correctly (post-merging SymbolicUtils.)
68
- - [ ] Show demo of PySRRegressor. Fit equations, then show how to view equations.
69
- - [ ] Add "selected" column string to regular equations dict.
70
- - [ ] List "Loss" instead of "MSE"
71
-
72
- ## Feature ideas
73
-
74
- - [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
75
- - [ ] Other dtypes available
76
- - [ ] NDSA-II
77
- - [ ] Cross-validation
78
- - [ ] Hierarchical model, so can re-use functional forms. Output of one equation goes into second equation?
79
- - [ ] Add function to plot equations
80
- - [ ] Refresh screen rather than dumping to stdout?
81
- - [ ] Add ability to save state from python
82
- - [ ] Additional degree operators?
83
- - [ ] Multi targets (vector ops). Idea 1: Node struct contains argument for which registers it is applied to. Then, can work with multiple components simultaneously. Though this may be tricky to get right. Idea 2: each op is defined by input/output space. Some operators are flexible, and the spaces should be adjusted automatically. Otherwise, only consider ops that make a tree possible. But will need additional ops here to get it to work. Idea 3: define each equation in 2 parts: one part that is shared between all outputs, and one that is different between all outputs. Maybe this could be an array of nodes corresponding to each output. And those nodes would define their functions.
84
- - Much easier option: simply flatten the output vector, and set the index as another input feature. The equation learned will be a single equation containing indices as a feature.
85
- - [ ] Tree crossover? I.e., can take as input a part of the same equation, so long as it is the same level or below?
86
- - [ ] Create flexible way of providing "simplification recipes." I.e., plus(plus(T, C), C) => plus(T, +(C, C)). The user could pass these.
87
- - [ ] Consider allowing multi-threading turned off, for faster testing (cache issue on travis). Or could simply fix the caching issue there.
88
- - [ ] Consider returning only the equation of interest; rather than all equations.
89
- - [ ] Enable derivative operators. These would differentiate their right argument wrt their left argument, some input variable.
90
-
91
- ## Algorithmic performance ideas:
92
-
93
-
94
- - [ ] Use package compiler and compile sr.jl into a standalone binary that can be used by pysr.
95
- - [ ] When doing equation warmup, only migrate those equations with almost the same complexity. Rather than having to consider simple equations later in the game.
96
- - [ ] Right now we only update the score based on some. Need to update score based on entire data! Note that optimizer only is used sometimes.
97
- - [ ] Idea: use gradient of equation with respect to each operator (perhaps simply add to each operator) to tell which part is the most "sensitive" to changes. Then, perhaps insert/delete/mutate on that part of the tree?
98
- - [ ] Start populations staggered; so that there is more frequent printing (and pops that start a bit later get hall of fame already)?
99
- - [ ] Consider adding mutation for constant<->variable
100
- - [ ] Implement more parts of the original Eureqa algorithms: https://www.creativemachineslab.com/eureqa.html
101
- - [ ] Experiment with freezing parts of model; then we only append/delete at end of tree.
102
- - [ ] Use NN to generate weights over all probability distribution conditional on error and existing equation, and train on some randomly-generated equations
103
- - [ ] For hierarchical idea: after running some number of iterations, do a search for "most common pattern". Then, turn that subtree into its own operator.
104
- - [ ] Calculate feature importances based on features we've already seen, then weight those features up in all random generations.
105
- - [ ] Calculate feature importances of future mutations, by looking at correlation between residual of model, and the features.
106
- - Store feature importances of future, and periodically update it.
107
- - [ ] Punish depth rather than size, as depth really hurts during optimization.
108
-
109
-
110
- ## Code performance ideas:
111
-
112
- - [ ] How hard is it to turn the recursive array evaluation into a for loop?
113
- - [ ] Try defining a binary tree as an array, rather than a linked list. See https://stackoverflow.com/a/6384714/2689923
114
- - in array branch
115
- - [ ] Add true multi-node processing, with MPI, or just file sharing. Multiple populations per core.
116
- - Ongoing in cluster branch
117
- - [ ] Performance: try inling things?
118
- - [ ] Try storing things like number nodes in a tree; then can iterate instead of counting
119
-
120
- ```julia
121
- mutable struct Tree
122
- degree::Array{Integer, 1}
123
- val::Array{Float32, 1}
124
- constant::Array{Bool, 1}
125
- op::Array{Integer, 1}
126
- Tree(s::Integer) = new(zeros(Integer, s), zeros(Float32, s), zeros(Bool, s), zeros(Integer, s))
127
- end
128
- ```
129
-
130
- - Then, we could even work with trees on the GPU, since they are all pre-allocated arrays.
131
- - A population could be a Tree, but with degree 2 on all the degrees. So a slice of population arrays forms a tree.
132
- - How many operations can we do via matrix ops? Mutate node=>easy.
133
- - Can probably batch and do many operations at once across a population.
134
- - Or, across all populations! Mutate operator: index 2D array and set it to random vector? But the indexing might hurt.
135
- - The big advantage: can evaluate all new mutated trees at once; as massive matrix operation.
136
- - Can control depth, rather than maxsize. Then just pretend all trees are full and same depth. Then we really don't need to care about depth.
137
-
138
- - [ ] Can we cache calculations, or does the compiler do that? E.g., I should only have to run exp(x0) once; after that it should be read from memory.
139
- - Done on caching branch. Currently am finding that this is quiet slow (presumably because memory allocation is the main issue).
140
- - [ ] Add GPU capability?
141
- - Not sure if possible, as binary trees are the real bottleneck.
142
- - Could generate on CPU, evaluate score on GPU?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/FeynmanEquations.csv DELETED
@@ -1,101 +0,0 @@
1
- Filename,datapoints,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,v2_high,v3_name,v3_low,v3_high,v4_name,v4_low,v4_high,v5_name,v5_low,v5_high,v6_name,v6_low,v6_high,v7_name,v7_low,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high
2
- I.6.2a,10,1,f,exp(-theta**2/2)/sqrt(2*pi),1,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,
3
- I.6.2,100,2,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2,sigma,1,3,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,
4
- I.6.2b,1000,3,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma),3,sigma,1,3,theta,1,3,theta1,1,3,,,,,,,,,,,,,,,,,,,,,
5
- I.8.14,100,4,d,sqrt((x2-x1)**2+(y2-y1)**2),4,x1,1,5,x2,1,5,y1,1,5,y2,1,5,,,,,,,,,,,,,,,,,,
6
- I.9.18,1000000,5,F,G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2),9,m1,1,2,m2,1,2,G,1,2,x1,3,4,x2,1,2,y1,3,4,y2,1,2,z1,3,4,z2,1,2,,,
7
- I.10.7,10,6,m,m_0/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
8
- I.11.19,100,7,A,x1*y1+x2*y2+x3*y3,6,x1,1,5,x2,1,5,x3,1,5,y1,1,5,y2,1,5,y3,1,5,,,,,,,,,,,,
9
- I.12.1,10,8,F,mu*Nn,2,mu,1,5,Nn,1,5,,,,,,,,,,,,,,,,,,,,,,,,
10
- I.12.2,10,10,F,q1*q2*r/(4*pi*epsilon*r**3),4,q1,1,5,q2,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,
11
- I.12.4,10,11,Ef,q1*r/(4*pi*epsilon*r**3),3,q1,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
12
- I.12.5,10,12,F,q2*Ef,2,q2,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
13
- I.12.11,10,13,F,q*(Ef+B*v*sin(theta)),5,q,1,5,Ef,1,5,B,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,
14
- I.13.4,10,9,K,1/2*m*(v**2+u**2+w**2),4,m,1,5,v,1,5,u,1,5,w,1,5,,,,,,,,,,,,,,,,,,
15
- I.13.12,10,14,U,G*m1*m2*(1/r2-1/r1),5,m1,1,5,m2,1,5,r1,1,5,r2,1,5,G,1,5,,,,,,,,,,,,,,,
16
- I.14.3,10,15,U,m*g*z,3,m,1,5,g,1,5,z,1,5,,,,,,,,,,,,,,,,,,,,,
17
- I.14.4,10,16,U,1/2*k_spring*x**2,2,k_spring,1,5,x,1,5,,,,,,,,,,,,,,,,,,,,,,,,
18
- I.15.3x,10,17,x1,(x-u*t)/sqrt(1-u**2/c**2),4,x,5,10,u,1,2,c,3,20,t,1,2,,,,,,,,,,,,,,,,,,
19
- I.15.3t,100,18,t1,(t-u*x/c**2)/sqrt(1-u**2/c**2),4,x,1,5,c,3,10,u,1,2,t,1,5,,,,,,,,,,,,,,,,,,
20
- I.15.1,10,19,p,m_0*v/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
21
- I.16.6,10,20,v1,(u+v)/(1+u*v/c**2),3,c,1,5,v,1,5,u,1,5,,,,,,,,,,,,,,,,,,,,,
22
- I.18.4,10,21,r,(m1*r1+m2*r2)/(m1+m2),4,m1,1,5,m2,1,5,r1,1,5,r2,1,5,,,,,,,,,,,,,,,,,,
23
- I.18.12,10,22,tau,r*F*sin(theta),3,r,1,5,F,1,5,theta,0,5,,,,,,,,,,,,,,,,,,,,,
24
- I.18.14,10,23,L,m*r*v*sin(theta),4,m,1,5,r,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,,,,
25
- I.24.6,10,24,E_n,1/2*m*(omega**2+omega_0**2)*1/2*x**2,4,m,1,3,omega,1,3,omega_0,1,3,x,1,3,,,,,,,,,,,,,,,,,,
26
- I.25.13,10,25,Volt,q/C,2,q,1,5,C,1,5,,,,,,,,,,,,,,,,,,,,,,,,
27
- I.26.2,100,26,theta1,arcsin(n*sin(theta2)),2,n,0,1,theta2,1,5,,,,,,,,,,,,,,,,,,,,,,,,
28
- I.27.6,10,27,foc,1/(1/d1+n/d2),3,d1,1,5,d2,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
29
- I.29.4,10,28,k,omega/c,2,omega,1,10,c,1,10,,,,,,,,,,,,,,,,,,,,,,,,
30
- I.29.16,1000,29,x,sqrt(x1**2+x2**2-2*x1*x2*cos(theta1-theta2)),4,x1,1,5,x2,1,5,theta1,1,5,theta2,1,5,,,,,,,,,,,,,,,,,,
31
- I.30.3,100,30,Int,Int_0*sin(n*theta/2)**2/sin(theta/2)**2,3,Int_0,1,5,theta,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
32
- I.30.5,100,31,theta,arcsin(lambd/(n*d)),3,lambd,1,2,d,2,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
33
- I.32.5,10,32,Pwr,q**2*a**2/(6*pi*epsilon*c**3),4,q,1,5,a,1,5,epsilon,1,5,c,1,5,,,,,,,,,,,,,,,,,,
34
- I.32.17,10,33,Pwr,(1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2),6,epsilon,1,2,c,1,2,Ef,1,2,r,1,2,omega,1,2,omega_0,3,5,,,,,,,,,,,,
35
- I.34.8,10,34,omega,q*v*B/p,4,q,1,5,v,1,5,B,1,5,p,1,5,,,,,,,,,,,,,,,,,,
36
- I.34.1,10,35,omega,omega_0/(1-v/c),3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
37
- I.34.14,10,36,omega,(1+v/c)/sqrt(1-v**2/c**2)*omega_0,3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
38
- I.34.27,10,37,E_n,(h/(2*pi))*omega,2,omega,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
39
- I.37.4,100,38,Int,I1+I2+2*sqrt(I1*I2)*cos(delta),3,I1,1,5,I2,1,5,delta,1,5,,,,,,,,,,,,,,,,,,,,,
40
- I.38.12,10,39,r,4*pi*epsilon*(h/(2*pi))**2/(m*q**2),4,m,1,5,q,1,5,h,1,5,epsilon,1,5,,,,,,,,,,,,,,,,,,
41
- I.39.1,10,40,E_n,3/2*pr*V,2,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,,,,
42
- I.39.11,10,41,E_n,1/(gamma-1)*pr*V,3,gamma,2,5,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,
43
- I.39.22,10,42,pr,n*kb*T/V,4,n,1,5,T,1,5,V,1,5,kb,1,5,,,,,,,,,,,,,,,,,,
44
- I.40.1,10,43,n,n_0*exp(-m*g*x/(kb*T)),6,n_0,1,5,m,1,5,x,1,5,T,1,5,g,1,5,kb,1,5,,,,,,,,,,,,
45
- I.41.16,10,44,L_rad,h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)),5,omega,1,5,T,1,5,h,1,5,kb,1,5,c,1,5,,,,,,,,,,,,,,,
46
- I.43.16,10,45,v,mu_drift*q*Volt/d,4,mu_drift,1,5,q,1,5,Volt,1,5,d,1,5,,,,,,,,,,,,,,,,,,
47
- I.43.31,10,46,D,mob*kb*T,3,mob,1,5,T,1,5,kb,1,5,,,,,,,,,,,,,,,,,,,,,
48
- I.43.43,10,47,kappa,1/(gamma-1)*kb*v/A,4,gamma,2,5,kb,1,5,A,1,5,v,1,5,,,,,,,,,,,,,,,,,,
49
- I.44.4,10,48,E_n,n*kb*T*ln(V2/V1),5,n,1,5,kb,1,5,T,1,5,V1,1,5,V2,1,5,,,,,,,,,,,,,,,
50
- I.47.23,10,49,c,sqrt(gamma*pr/rho),3,gamma,1,5,pr,1,5,rho,1,5,,,,,,,,,,,,,,,,,,,,,
51
- I.48.2,100,50,E_n,m*c**2/sqrt(1-v**2/c**2),3,m,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
52
- I.50.26,10,51,x,x1*(cos(omega*t)+alpha*cos(omega*t)**2),4,x1,1,3,omega,1,3,t,1,3,alpha,1,3,,,,,,,,,,,,,,,,,,
53
- II.2.42,10,52,Pwr,kappa*(T2-T1)*A/d,5,kappa,1,5,T1,1,5,T2,1,5,A,1,5,d,1,5,,,,,,,,,,,,,,,
54
- II.3.24,10,53,flux,Pwr/(4*pi*r**2),2,Pwr,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,,,,
55
- II.4.23,10,54,Volt,q/(4*pi*epsilon*r),3,q,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
56
- II.6.11,10,55,Volt,1/(4*pi*epsilon)*p_d*cos(theta)/r**2,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
57
- II.6.15a,1000,56,Ef,p_d/(4*pi*epsilon)*3*z/r**5*sqrt(x**2+y**2),6,epsilon,1,3,p_d,1,3,r,1,3,x,1,3,y,1,3,z,1,3,,,,,,,,,,,,
58
- II.6.15b,10,57,Ef,p_d/(4*pi*epsilon)*3*cos(theta)*sin(theta)/r**3,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
59
- II.8.7,10,58,E_n,3/5*q**2/(4*pi*epsilon*d),3,q,1,5,epsilon,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
60
- II.8.31,10,59,E_den,epsilon*Ef**2/2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
61
- II.10.9,10,60,Ef,sigma_den/epsilon*1/(1+chi),3,sigma_den,1,5,epsilon,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
62
- II.11.3,10,61,x,q*Ef/(m*(omega_0**2-omega**2)),5,q,1,3,Ef,1,3,m,1,3,omega_0,3,5,omega,1,2,,,,,,,,,,,,,,,
63
- II.11.17,10,62,n,n_0*(1+p_d*Ef*cos(theta)/(kb*T)),6,n_0,1,3,kb,1,3,T,1,3,theta,1,3,p_d,1,3,Ef,1,3,,,,,,,,,,,,
64
- II.11.20,10,63,Pol,n_rho*p_d**2*Ef/(3*kb*T),5,n_rho,1,5,p_d,1,5,Ef,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
65
- II.11.27,100,64,Pol,n*alpha/(1-(n*alpha/3))*epsilon*Ef,4,n,0,1,alpha,0,1,epsilon,1,2,Ef,1,2,,,,,,,,,,,,,,,,,,
66
- II.11.28,100,65,theta,1+n*alpha/(1-(n*alpha/3)),2,n,0,1,alpha,0,1,,,,,,,,,,,,,,,,,,,,,,,,
67
- II.13.17,10,66,B,1/(4*pi*epsilon*c**2)*2*I/r,4,epsilon,1,5,c,1,5,I,1,5,r,1,5,,,,,,,,,,,,,,,,,,
68
- II.13.23,100,67,rho_c,rho_c_0/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
69
- II.13.34,10,68,j,rho_c_0*v/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
70
- II.15.4,10,69,E_n,-mom*B*cos(theta),3,mom,1,5,B,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
71
- II.15.5,10,70,E_n,-p_d*Ef*cos(theta),3,p_d,1,5,Ef,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
72
- II.21.32,10,71,Volt,q/(4*pi*epsilon*r*(1-v/c)),5,q,1,5,epsilon,1,5,r,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,
73
- II.24.17,10,72,k,sqrt(omega**2/c**2-pi**2/d**2),3,omega,4,6,c,1,2,d,2,4,,,,,,,,,,,,,,,,,,,,,
74
- II.27.16,10,73,flux,epsilon*c*Ef**2,3,epsilon,1,5,c,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,
75
- II.27.18,10,74,E_den,epsilon*Ef**2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
76
- II.34.2a,10,75,I,q*v/(2*pi*r),3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
77
- II.34.2,10,76,mom,q*v*r/2,3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
78
- II.34.11,10,77,omega,g_*q*B/(2*m),4,g_,1,5,q,1,5,B,1,5,m,1,5,,,,,,,,,,,,,,,,,,
79
- II.34.29a,10,78,mom,q*h/(4*pi*m),3,q,1,5,h,1,5,m,1,5,,,,,,,,,,,,,,,,,,,,,
80
- II.34.29b,10,79,E_n,g_*mom*B*Jz/(h/(2*pi)),5,g_,1,5,h,1,5,Jz,1,5,mom,1,5,B,1,5,,,,,,,,,,,,,,,
81
- II.35.18,10,80,n,n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T))),5,n_0,1,3,kb,1,3,T,1,3,mom,1,3,B,1,3,,,,,,,,,,,,,,,
82
- II.35.21,10,81,M,n_rho*mom*tanh(mom*B/(kb*T)),5,n_rho,1,5,mom,1,5,B,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
83
- II.36.38,10,82,f,mom*H/(kb*T)+(mom*alpha)/(epsilon*c**2*kb*T)*M,8,mom,1,3,H,1,3,kb,1,3,T,1,3,alpha,1,3,epsilon,1,3,c,1,3,M,1,3,,,,,,
84
- II.37.1,10,83,E_n,mom*(1+chi)*B,3,mom,1,5,B,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
85
- II.38.3,10,84,F,Y*A*x/d,4,Y,1,5,A,1,5,d,1,5,x,1,5,,,,,,,,,,,,,,,,,,
86
- II.38.14,10,85,mu_S,Y/(2*(1+sigma)),2,Y,1,5,sigma,1,5,,,,,,,,,,,,,,,,,,,,,,,,
87
- III.4.32,10,86,n,1/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
88
- III.4.33,10,87,E_n,(h/(2*pi))*omega/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
89
- III.7.38,10,88,omega,2*mom*B/(h/(2*pi)),3,mom,1,5,B,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,
90
- III.8.54,10,89,prob,sin(E_n*t/(h/(2*pi)))**2,3,E_n,1,2,t,1,2,h,1,4,,,,,,,,,,,,,,,,,,,,,
91
- III.9.52,1000,90,prob,(p_d*Ef*t/(h/(2*pi)))*sin((omega-omega_0)*t/2)**2/((omega-omega_0)*t/2)**2,6,p_d,1,3,Ef,1,3,t,1,3,h,1,3,omega,1,5,omega_0,1,5,,,,,,,,,,,,
92
- III.10.19,100,91,E_n,mom*sqrt(Bx**2+By**2+Bz**2),4,mom,1,5,Bx,1,5,By,1,5,Bz,1,5,,,,,,,,,,,,,,,,,,
93
- III.12.43,10,92,L,n*(h/(2*pi)),2,n,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
94
- III.13.18,10,93,v,2*E_n*d**2*k/(h/(2*pi)),4,E_n,1,5,d,1,5,k,1,5,h,1,5,,,,,,,,,,,,,,,,,,
95
- III.14.14,10,94,I,I_0*(exp(q*Volt/(kb*T))-1),5,I_0,1,5,q,1,2,Volt,1,2,kb,1,2,T,1,2,,,,,,,,,,,,,,,
96
- III.15.12,10,95,E_n,2*U*(1-cos(k*d)),3,U,1,5,k,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
97
- III.15.14,10,96,m,(h/(2*pi))**2/(2*E_n*d**2),3,h,1,5,E_n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
98
- III.15.27,10,97,k,2*pi*alpha/(n*d),3,alpha,1,5,n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
99
- III.17.37,10,98,f,beta*(1+alpha*cos(theta)),3,beta,1,5,alpha,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
100
- III.19.51,10,99,E_n,-m*q**4/(2*(4*pi*epsilon)**2*(h/(2*pi))**2)*(1/n**2),5,m,1,5,q,1,5,h,1,5,n,1,5,epsilon,1,5,,,,,,,,,,,,,,,
101
- III.21.20,10,100,j,-rho_c_0*q*A_vec/m,4,rho_c_0,1,5,q,1,5,A_vec,1,5,m,1,5,,,,,,,,,,,,,,,,,,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/backend.md CHANGED
@@ -2,27 +2,73 @@
2
 
3
  If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend.
4
  The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl).
5
- This package is accessed with [`PyJulia`](https://github.com/JuliaPy/pyjulia), which allows us to transfer objects back and forth between the Python and Julia runtimes.
6
 
7
  PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself.
8
  Generally you can do this as follows:
9
 
10
- 1. Clone a copy of the backend:
11
- ```
 
 
 
12
  git clone https://github.com/MilesCranmer/SymbolicRegression.jl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ```
14
- 2. Edit the source code in `src/` to your requirements:
15
- - The documentation for the backend is given [here](https://astroautomata.com/SymbolicRegression.jl/dev/).
16
- - Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time.
17
- - Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://astroautomata.com/SymbolicRegression.jl/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node).
18
- - Parts of the code which are typically edited by users include:
19
- - [`src/LossFunctions.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/LossFunctions.jl), particularly the function `eval_loss`. This function assigns a loss to a given expression, using `eval_tree_array` to evaluate it, and `loss` to compute the loss with respect to the dataset.
20
- - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions.
21
- - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function.
22
- - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options.
23
- - For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl).
24
- 3. Specify the directory of `SymbolicRegression.jl` to PySR by setting `julia_project` in the `PySRRegressor` object, and run `.fit` when you're ready. That's it! No compilation or build steps required.
25
- - Note that it will automatically update your project by default; to turn this off, set `update=False`.
26
 
27
  If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://astroautomata.com/SymbolicRegression.jl/dev/).
28
 
 
2
 
3
  If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend.
4
  The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl).
5
+ This package is accessed with [`juliacall`](https://github.com/JuliaPy/PythonCall.jl), which allows us to transfer objects back and forth between the Python and Julia runtimes.
6
 
7
  PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself.
8
  Generally you can do this as follows:
9
 
10
+ ## 1. Check out the source code
11
+
12
+ Clone a copy of the backend as well as PySR:
13
+
14
+ ```bash
15
  git clone https://github.com/MilesCranmer/SymbolicRegression.jl
16
+ git clone https://github.com/MilesCranmer/PySR
17
+ ```
18
+
19
+ You may wish to check out the specific versions, which you can do with:
20
+
21
+ ```bash
22
+ cd PySR
23
+ git checkout <version>
24
+
25
+ # You can see the current backend version in `pysr/juliapkg.json`
26
+ cd ../SymbolicRegression.jl
27
+ git checkout <backend_version>
28
+ ```
29
+
30
+ ## 2. Edit the source to your requirements
31
+
32
+ The main search code can be found in `src/SymbolicRegression.jl`.
33
+
34
+ Here are some tips:
35
+
36
+ - The documentation for the backend is given [here](https://astroautomata.com/SymbolicRegression.jl/dev/).
37
+ - Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time.
38
+ - Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://astroautomata.com/SymbolicRegression.jl/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node).
39
+ - For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl).
40
+ - Parts of the code which are typically edited by users include:
41
+ - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions.
42
+ - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function.
43
+ - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options.
44
+
45
+ ## 3. Let PySR use the modified backend
46
+
47
+ Once you have made your changes, you should edit the `pysr/juliapkg.json` file
48
+ in the PySR repository to point to this local copy.
49
+ Do this by removing the `"version"` key and adding a `"dev"` and `"path"` key:
50
+
51
+ ```json
52
+ ...
53
+ "packages": {
54
+ "SymbolicRegression": {
55
+ "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
56
+ "dev": true,
57
+ "path": "/path/to/SymbolicRegression.jl"
58
+ },
59
+ ...
60
  ```
61
+
62
+ You can then install PySR with this modified backend by running:
63
+
64
+ ```bash
65
+ cd PySR
66
+ pip install .
67
+ ```
68
+
69
+ For more information on `juliapkg.json`, see [`pyjuliapkg`](https://github.com/JuliaPy/pyjuliapkg).
70
+
71
+ ## Additional notes
 
72
 
73
  If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://astroautomata.com/SymbolicRegression.jl/dev/).
74
 
docs/examples.md CHANGED
@@ -144,7 +144,7 @@ but there are still some additional steps you can take to reduce the effect of n
144
 
145
  One thing you could do, which we won't detail here, is to create a custom log-likelihood
146
  given some assumed noise model. By passing weights to the fit function, and
147
- defining a custom loss function such as `loss="myloss(x, y, w) = w * (x - y)^2"`,
148
  you can define any sort of log-likelihood you wish. (However, note that it must be bounded at zero)
149
 
150
  However, the simplest thing to do is preprocessing, just like for feature selection. To do this,
@@ -189,12 +189,10 @@ where $p_i$ is the $i$th prime number, and $x$ is the input feature.
189
  Let's see if we can discover this using
190
  the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.
191
 
192
- First, let's manually initialize the Julia backend
193
- (here, with 8 threads and `-O3`):
194
 
195
  ```python
196
- import pysr
197
- jl = pysr.julia_helpers.init_julia(julia_kwargs={"threads": 8, "optimize": 3})
198
  ```
199
 
200
  `jl` stores the Julia runtime.
@@ -203,7 +201,7 @@ Now, let's run some Julia code to add the Primes.jl
203
  package to the PySR environment:
204
 
205
  ```python
206
- jl.eval("""
207
  import Pkg
208
  Pkg.add("Primes")
209
  """)
@@ -213,13 +211,13 @@ This imports the Julia package manager, and uses it to install
213
  `Primes.jl`. Now let's import `Primes.jl`:
214
 
215
  ```python
216
- jl.eval("import Primes")
217
  ```
218
 
219
  Now, we define a custom operator:
220
 
221
  ```python
222
- jl.eval("""
223
  function p(i::T) where T
224
  if (0.5 < i < 1000)
225
  return T(Primes.prime(round(Int, i)))
@@ -237,7 +235,7 @@ If in-bounds, it rounds it to the nearest integer, compures the corresponding pr
237
  converts it to the same type as input.
238
 
239
  Next, let's generate a list of primes for our test dataset.
240
- Since we are using PyJulia, we can just call `p` directly to do this:
241
 
242
  ```python
243
  primes = {i: jl.p(i*1.0) for i in range(1, 999)}
@@ -382,7 +380,7 @@ end
382
  model = PySRRegressor(
383
  niterations=100,
384
  binary_operators=["*", "+", "-"],
385
- full_objective=objective,
386
  )
387
  ```
388
 
@@ -464,7 +462,7 @@ let's also create a custom loss function
464
  that looks at the error in log-space:
465
 
466
  ```python
467
- loss = """function loss_fnc(prediction, target)
468
  scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20)))
469
  sign_loss = 10 * (sign(prediction) - sign(target))^2
470
  return scatter_loss + sign_loss
@@ -478,7 +476,7 @@ Now let's define our model:
478
  model = PySRRegressor(
479
  binary_operators=["+", "-", "*", "/"],
480
  unary_operators=["square"],
481
- loss=loss,
482
  complexity_of_constants=2,
483
  maxsize=25,
484
  niterations=100,
 
144
 
145
  One thing you could do, which we won't detail here, is to create a custom log-likelihood
146
  given some assumed noise model. By passing weights to the fit function, and
147
+ defining a custom loss function such as `elementwise_loss="myloss(x, y, w) = w * (x - y)^2"`,
148
  you can define any sort of log-likelihood you wish. (However, note that it must be bounded at zero)
149
 
150
  However, the simplest thing to do is preprocessing, just like for feature selection. To do this,
 
189
  Let's see if we can discover this using
190
  the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.
191
 
192
+ First, let's get the Julia backend:
 
193
 
194
  ```python
195
+ from pysr import jl
 
196
  ```
197
 
198
  `jl` stores the Julia runtime.
 
201
  package to the PySR environment:
202
 
203
  ```python
204
+ jl.seval("""
205
  import Pkg
206
  Pkg.add("Primes")
207
  """)
 
211
  `Primes.jl`. Now let's import `Primes.jl`:
212
 
213
  ```python
214
+ jl.seval("import Primes")
215
  ```
216
 
217
  Now, we define a custom operator:
218
 
219
  ```python
220
+ jl.seval("""
221
  function p(i::T) where T
222
  if (0.5 < i < 1000)
223
  return T(Primes.prime(round(Int, i)))
 
235
  converts it to the same type as input.
236
 
237
  Next, let's generate a list of primes for our test dataset.
238
+ Since we are using juliacall, we can just call `p` directly to do this:
239
 
240
  ```python
241
  primes = {i: jl.p(i*1.0) for i in range(1, 999)}
 
380
  model = PySRRegressor(
381
  niterations=100,
382
  binary_operators=["*", "+", "-"],
383
+ loss_function=objective,
384
  )
385
  ```
386
 
 
462
  that looks at the error in log-space:
463
 
464
  ```python
465
+ elementwise_loss = """function loss_fnc(prediction, target)
466
  scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20)))
467
  sign_loss = 10 * (sign(prediction) - sign(target))^2
468
  return scatter_loss + sign_loss
 
476
  model = PySRRegressor(
477
  binary_operators=["+", "-", "*", "/"],
478
  unary_operators=["square"],
479
+ elementwise_loss=elementwise_loss,
480
  complexity_of_constants=2,
481
  maxsize=25,
482
  niterations=100,
docs/options.md CHANGED
@@ -78,11 +78,11 @@ with the equations.
78
  Each cycle considers every 10-equation subsample (re-sampled for each individual 10,
79
  unless `fast_cycle` is set in which case the subsamples are separate groups of equations)
80
  a single time, producing one mutated equation for each.
81
- The parameter `ncyclesperiteration` defines how many times this
82
  occurs before the equations are compared to the hall of fame,
83
  and new equations are migrated from the hall of fame, or from other populations.
84
  It also controls how slowly annealing occurs. You may find that increasing
85
- `ncyclesperiteration` results in a higher cycles-per-second, as the head
86
  worker needs to reduce and distribute new equations less often, and also increases
87
  diversity. But at the same
88
  time, a smaller number it might be that migrating equations from the hall of fame helps
@@ -243,7 +243,7 @@ train the parameters within JAX (and is differentiable).
243
 
244
  The default loss is mean-square error, and weighted mean-square error.
245
  One can pass an arbitrary Julia string to define a custom loss, using,
246
- e.g., `loss="myloss(x, y) = abs(x - y)^1.5"`. For more details,
247
  see the
248
  [Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/)
249
  page for SymbolicRegression.jl.
@@ -253,26 +253,26 @@ Here are some additional examples:
253
  abs(x-y) loss
254
 
255
  ```python
256
- PySRRegressor(..., loss="f(x, y) = abs(x - y)^1.5")
257
  ```
258
 
259
  Note that the function name doesn't matter:
260
 
261
  ```python
262
- PySRRegressor(..., loss="loss(x, y) = abs(x * y)")
263
  ```
264
 
265
  With weights:
266
 
267
  ```python
268
- model = PySRRegressor(..., loss="myloss(x, y, w) = w * abs(x - y)")
269
  model.fit(..., weights=weights)
270
  ```
271
 
272
  Weights can be used in arbitrary ways:
273
 
274
  ```python
275
- model = PySRRegressor(..., weights=weights, loss="myloss(x, y, w) = abs(x - y)^2/w^2")
276
  model.fit(..., weights=weights)
277
  ```
278
 
@@ -280,13 +280,13 @@ Built-in loss (faster) (see [losses](https://astroautomata.com/SymbolicRegressio
280
  This one computes the L3 norm:
281
 
282
  ```python
283
- PySRRegressor(..., loss="LPDistLoss{3}()")
284
  ```
285
 
286
  Can also uses these losses for weighted (weighted-average):
287
 
288
  ```python
289
- model = PySRRegressor(..., weights=weights, loss="LPDistLoss{3}()")
290
  model.fit(..., weights=weights)
291
  ```
292
 
 
78
  Each cycle considers every 10-equation subsample (re-sampled for each individual 10,
79
  unless `fast_cycle` is set in which case the subsamples are separate groups of equations)
80
  a single time, producing one mutated equation for each.
81
+ The parameter `ncycles_per_iteration` defines how many times this
82
  occurs before the equations are compared to the hall of fame,
83
  and new equations are migrated from the hall of fame, or from other populations.
84
  It also controls how slowly annealing occurs. You may find that increasing
85
+ `ncycles_per_iteration` results in a higher cycles-per-second, as the head
86
  worker needs to reduce and distribute new equations less often, and also increases
87
  diversity. But at the same
88
  time, a smaller number it might be that migrating equations from the hall of fame helps
 
243
 
244
  The default loss is mean-square error, and weighted mean-square error.
245
  One can pass an arbitrary Julia string to define a custom loss, using,
246
+ e.g., `elementwise_loss="myloss(x, y) = abs(x - y)^1.5"`. For more details,
247
  see the
248
  [Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/)
249
  page for SymbolicRegression.jl.
 
253
  abs(x-y) loss
254
 
255
  ```python
256
+ PySRRegressor(..., elementwise_loss="f(x, y) = abs(x - y)^1.5")
257
  ```
258
 
259
  Note that the function name doesn't matter:
260
 
261
  ```python
262
+ PySRRegressor(..., elementwise_loss="loss(x, y) = abs(x * y)")
263
  ```
264
 
265
  With weights:
266
 
267
  ```python
268
+ model = PySRRegressor(..., elementwise_loss="myloss(x, y, w) = w * abs(x - y)")
269
  model.fit(..., weights=weights)
270
  ```
271
 
272
  Weights can be used in arbitrary ways:
273
 
274
  ```python
275
+ model = PySRRegressor(..., weights=weights, elementwise_loss="myloss(x, y, w) = abs(x - y)^2/w^2")
276
  model.fit(..., weights=weights)
277
  ```
278
 
 
280
  This one computes the L3 norm:
281
 
282
  ```python
283
+ PySRRegressor(..., elementwise_loss="LPDistLoss{3}()")
284
  ```
285
 
286
  Can also uses these losses for weighted (weighted-average):
287
 
288
  ```python
289
+ model = PySRRegressor(..., weights=weights, elementwise_loss="LPDistLoss{3}()")
290
  model.fit(..., weights=weights)
291
  ```
292
 
docs/tuning.md CHANGED
@@ -14,12 +14,12 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
14
  2. Use only the operators I think it needs and no more.
15
  3. Increase `populations` to `3*num_cores`.
16
  4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data).
17
- 5. While on a laptop or single node machine, you might leave the default `ncyclesperiteration`, on a cluster with ~100 cores I like to set `ncyclesperiteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!)
18
  6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!)
19
  7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore.
20
  8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
21
  9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
22
- 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncyclesperiteration` is large, so that optimization happens more frequently.
23
  11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
24
  12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
25
 
 
14
  2. Use only the operators I think it needs and no more.
15
  3. Increase `populations` to `3*num_cores`.
16
  4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data).
17
+ 5. While on a laptop or single node machine, you might leave the default `ncycles_per_iteration`, on a cluster with ~100 cores I like to set `ncycles_per_iteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!)
18
  6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!)
19
  7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore.
20
  8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
21
  9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
22
+ 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
23
  11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
24
  12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
25
 
environment.yml CHANGED
@@ -2,12 +2,11 @@ name: test
2
  channels:
3
  - conda-forge
4
  dependencies:
5
- - sympy
6
- - pandas
7
- - numpy
8
- - scikit-learn
9
- - setuptools
10
- - pyjulia
11
- - openlibm
12
- - openspecfun
13
- - click
 
2
  channels:
3
  - conda-forge
4
  dependencies:
5
+ - python>=3.7
6
+ - sympy>=1.0.0,<2.0.0
7
+ - pandas>=0.21.0,<3.0.0
8
+ - numpy>=1.13.0,<2.0.0
9
+ - scikit-learn>=1.0.0,<2.0.0
10
+ - pyjuliacall>=0.9.15,<0.10.0
11
+ - click>=7.0.0,<9.0.0
12
+ - typing_extensions>=4.0.0,<5.0.0
 
example.py CHANGED
@@ -18,7 +18,7 @@ model = PySRRegressor(
18
  ],
19
  extra_sympy_mappings={"inv": lambda x: 1 / x},
20
  # ^ Define operator for SymPy as well
21
- loss="loss(x, y) = (x - y)^2",
22
  # ^ Custom loss function (julia syntax)
23
  )
24
 
 
18
  ],
19
  extra_sympy_mappings={"inv": lambda x: 1 / x},
20
  # ^ Define operator for SymPy as well
21
+ elementwise_loss="loss(x, y) = (x - y)^2",
22
  # ^ Custom loss function (julia syntax)
23
  )
24
 
examples/pysr_demo.ipynb CHANGED
@@ -15,68 +15,9 @@
15
  "id": "tQ1r1bbb0yBv"
16
  },
17
  "source": [
18
- "\n",
19
  "## Instructions\n",
20
  "1. Work on a copy of this notebook: _File_ > _Save a copy in Drive_ (you will need a Google account).\n",
21
- "2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n",
22
- "3. Execute the following cell (click on it and press Ctrl+Enter) to install Julia. This may take a minute or so.\n",
23
- "4. Continue to the next section.\n",
24
- "\n",
25
- "_Notes_:\n",
26
- "* If your Colab Runtime gets reset (e.g., due to inactivity), repeat steps 3, 4.\n",
27
- "* After installation, if you want to change the Julia version or activate/deactivate the GPU, you will need to reset the Runtime: _Runtime_ > _Delete and disconnect runtime_ and repeat steps 2-4."
28
- ]
29
- },
30
- {
31
- "cell_type": "markdown",
32
- "metadata": {
33
- "id": "COndi88gbDgO"
34
- },
35
- "source": [
36
- "**Run the following code to install Julia**"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": null,
42
- "metadata": {
43
- "colab": {
44
- "base_uri": "https://localhost:8080/"
45
- },
46
- "id": "GIeFXS0F0zww",
47
- "outputId": "5399ed75-f77f-47c5-e53b-4b2f231f2839"
48
- },
49
- "outputs": [],
50
- "source": [
51
- "!curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel 1.10"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": null,
57
- "metadata": {
58
- "colab": {
59
- "base_uri": "https://localhost:8080/"
60
- },
61
- "id": "Iu9X-Y-YNmwM",
62
- "outputId": "ee14af65-043a-4ad6-efa0-3cdcc48a4eb8"
63
- },
64
- "outputs": [],
65
- "source": [
66
- "# Make julia available on PATH:\n",
67
- "!ln -s $HOME/.juliaup/bin/julia /usr/local/bin/julia\n",
68
- "\n",
69
- "# Test it works:\n",
70
- "!julia --version"
71
- ]
72
- },
73
- {
74
- "cell_type": "markdown",
75
- "metadata": {
76
- "id": "ORv1c6xvbDgV"
77
- },
78
- "source": [
79
- "Install PySR"
80
  ]
81
  },
82
  {
@@ -91,36 +32,23 @@
91
  },
92
  "outputs": [],
93
  "source": [
94
- "!pip install pysr && python -m pysr install"
95
  ]
96
  },
97
  {
98
  "cell_type": "markdown",
99
- "metadata": {
100
- "id": "etTMEV0wDqld"
101
- },
102
  "source": [
103
- "Colab's printing is non-standard, so we need to manually initialize Julia and redirect its printing. Normally, however, this is not required, and PySR will automatically start Julia during the first call to `.fit`:"
104
  ]
105
  },
106
  {
107
  "cell_type": "code",
108
  "execution_count": null,
109
- "metadata": {
110
- "id": "j666aOI8xWF_"
111
- },
112
  "outputs": [],
113
  "source": [
114
- "def init_colab_printing():\n",
115
- " from pysr.julia_helpers import init_julia\n",
116
- " from julia.tools import redirect_output_streams\n",
117
- "\n",
118
- " julia_kwargs = dict(optimize=3, threads=\"auto\", compiled_modules=False)\n",
119
- " init_julia(julia_kwargs=julia_kwargs)\n",
120
- " redirect_output_streams()\n",
121
- "\n",
122
- "\n",
123
- "init_colab_printing()"
124
  ]
125
  },
126
  {
@@ -129,7 +57,7 @@
129
  "id": "qeCPKd9wldEK"
130
  },
131
  "source": [
132
- "Now, let's import all of our libraries:"
133
  ]
134
  },
135
  {
@@ -233,7 +161,7 @@
233
  " niterations=30,\n",
234
  " binary_operators=[\"+\", \"*\"],\n",
235
  " unary_operators=[\"cos\", \"exp\", \"sin\"],\n",
236
- " **default_pysr_params\n",
237
  ")\n",
238
  "\n",
239
  "model.fit(X, y)"
@@ -648,7 +576,7 @@
648
  "outputs": [],
649
  "source": [
650
  "model = PySRRegressor(\n",
651
- " loss=\"myloss(x, y, w) = w * abs(x - y)\", # Custom loss function with weights.\n",
652
  " niterations=20,\n",
653
  " populations=20, # Use more populations\n",
654
  " binary_operators=[\"+\", \"*\"],\n",
@@ -815,26 +743,7 @@
815
  "where $p_i$ is the $i$th prime number, and $x$ is the input feature.\n",
816
  "\n",
817
  "Let's see if we can discover this using\n",
818
- "the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.\n",
819
- "\n",
820
- "First, let's get the Julia backend\n",
821
- "Here, we might choose to manually specify unlimited threads, `-O3`,\n",
822
- "and `compile_modules=False`, although this will only propagate if Julia has not yet started:"
823
- ]
824
- },
825
- {
826
- "cell_type": "code",
827
- "execution_count": null,
828
- "metadata": {
829
- "id": "yUC4BMuHG-KN"
830
- },
831
- "outputs": [],
832
- "source": [
833
- "import pysr\n",
834
- "\n",
835
- "jl = pysr.julia_helpers.init_julia(\n",
836
- " julia_kwargs=dict(optimize=3, threads=\"auto\", compiled_modules=False)\n",
837
- ")"
838
  ]
839
  },
840
  {
@@ -859,7 +768,9 @@
859
  },
860
  "outputs": [],
861
  "source": [
862
- "jl.eval(\n",
 
 
863
  " \"\"\"\n",
864
  "import Pkg\n",
865
  "Pkg.add(\"Primes\")\n",
@@ -885,7 +796,24 @@
885
  },
886
  "outputs": [],
887
  "source": [
888
- "jl.eval(\"import Primes\")"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  ]
890
  },
891
  {
@@ -906,11 +834,11 @@
906
  },
907
  "outputs": [],
908
  "source": [
909
- "jl.eval(\n",
910
  " \"\"\"\n",
911
  "function p(i::T) where T\n",
912
  " if 0.5 < i < 1000\n",
913
- " return T(Primes.prime(round(Int, i)))\n",
914
  " else\n",
915
  " return T(NaN)\n",
916
  " end\n",
@@ -919,6 +847,29 @@
919
  ")"
920
  ]
921
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  {
923
  "cell_type": "markdown",
924
  "metadata": {
@@ -947,7 +898,7 @@
947
  "(However, note that this version assumes 64-bit float input, rather than any input type `T`)\n",
948
  "\n",
949
  "Next, let's generate a list of primes for our test dataset.\n",
950
- "Since we are using PyJulia, we can just call `p` directly to do this:\n"
951
  ]
952
  },
953
  {
@@ -1382,7 +1333,7 @@
1382
  "\n",
1383
  "> **Warning**\n",
1384
  ">\n",
1385
- "> First, let's save the data, because sometimes PyTorch and PyJulia's C bindings interfere and cause the colab kernel to crash. If we need to restart, we can just load the data without having to retrain the network:"
1386
  ]
1387
  },
1388
  {
@@ -1413,7 +1364,7 @@
1413
  "id": "krhaNlwFG-KT"
1414
  },
1415
  "source": [
1416
- "We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts PyJulia)."
1417
  ]
1418
  },
1419
  {
@@ -1467,7 +1418,7 @@
1467
  "id": "1a738a33"
1468
  },
1469
  "source": [
1470
- "If this segfaults, restart the notebook, and run the initial imports and PyJulia part, but skip the PyTorch training. This is because PyTorch's C binding tends to interefere with PyJulia. You can then re-run the `pkl.load` cell to import the data."
1471
  ]
1472
  },
1473
  {
 
15
  "id": "tQ1r1bbb0yBv"
16
  },
17
  "source": [
 
18
  "## Instructions\n",
19
  "1. Work on a copy of this notebook: _File_ > _Save a copy in Drive_ (you will need a Google account).\n",
20
+ "2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ]
22
  },
23
  {
 
32
  },
33
  "outputs": [],
34
  "source": [
35
+ "!pip install -U pysr"
36
  ]
37
  },
38
  {
39
  "cell_type": "markdown",
40
+ "metadata": {},
 
 
41
  "source": [
42
+ "Julia and Julia dependencies are installed at first import:"
43
  ]
44
  },
45
  {
46
  "cell_type": "code",
47
  "execution_count": null,
48
+ "metadata": {},
 
 
49
  "outputs": [],
50
  "source": [
51
+ "import pysr"
 
 
 
 
 
 
 
 
 
52
  ]
53
  },
54
  {
 
57
  "id": "qeCPKd9wldEK"
58
  },
59
  "source": [
60
+ "Now, let's import everything else as well as the PySRRegressor:\n"
61
  ]
62
  },
63
  {
 
161
  " niterations=30,\n",
162
  " binary_operators=[\"+\", \"*\"],\n",
163
  " unary_operators=[\"cos\", \"exp\", \"sin\"],\n",
164
+ " **default_pysr_params,\n",
165
  ")\n",
166
  "\n",
167
  "model.fit(X, y)"
 
576
  "outputs": [],
577
  "source": [
578
  "model = PySRRegressor(\n",
579
+ " elementwise_loss=\"myloss(x, y, w) = w * abs(x - y)\", # Custom loss function with weights.\n",
580
  " niterations=20,\n",
581
  " populations=20, # Use more populations\n",
582
  " binary_operators=[\"+\", \"*\"],\n",
 
743
  "where $p_i$ is the $i$th prime number, and $x$ is the input feature.\n",
744
  "\n",
745
  "Let's see if we can discover this using\n",
746
+ "the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
  ]
748
  },
749
  {
 
768
  },
769
  "outputs": [],
770
  "source": [
771
+ "from pysr import jl\n",
772
+ "\n",
773
+ "jl.seval(\n",
774
  " \"\"\"\n",
775
  "import Pkg\n",
776
  "Pkg.add(\"Primes\")\n",
 
796
  },
797
  "outputs": [],
798
  "source": [
799
+ "jl.seval(\"using Primes: prime\")"
800
+ ]
801
+ },
802
+ {
803
+ "cell_type": "markdown",
804
+ "metadata": {},
805
+ "source": [
806
+ "Note that PySR should automatically load the `juliacall.ipython` extension for you,\n",
807
+ "which means that you can also execute Julia code in the notebook using the `%%julia` magic:"
808
+ ]
809
+ },
810
+ {
811
+ "cell_type": "code",
812
+ "execution_count": null,
813
+ "metadata": {},
814
+ "outputs": [],
815
+ "source": [
816
+ "%julia using Primes: prime"
817
  ]
818
  },
819
  {
 
834
  },
835
  "outputs": [],
836
  "source": [
837
+ "jl.seval(\n",
838
  " \"\"\"\n",
839
  "function p(i::T) where T\n",
840
  " if 0.5 < i < 1000\n",
841
+ " return T(prime(round(Int, i)))\n",
842
  " else\n",
843
  " return T(NaN)\n",
844
  " end\n",
 
847
  ")"
848
  ]
849
  },
850
+ {
851
+ "cell_type": "markdown",
852
+ "metadata": {},
853
+ "source": [
854
+ "Or, equivalently:"
855
+ ]
856
+ },
857
+ {
858
+ "cell_type": "code",
859
+ "execution_count": null,
860
+ "metadata": {},
861
+ "outputs": [],
862
+ "source": [
863
+ "%%julia\n",
864
+ "function p(i::T) where T\n",
865
+ " if 0.5 < i < 1000\n",
866
+ " return T(prime(round(Int, i)))\n",
867
+ " else\n",
868
+ " return T(NaN)\n",
869
+ " end\n",
870
+ "end"
871
+ ]
872
+ },
873
  {
874
  "cell_type": "markdown",
875
  "metadata": {
 
898
  "(However, note that this version assumes 64-bit float input, rather than any input type `T`)\n",
899
  "\n",
900
  "Next, let's generate a list of primes for our test dataset.\n",
901
+ "Since we are using juliacall, we can just call `p` directly to do this:\n"
902
  ]
903
  },
904
  {
 
1333
  "\n",
1334
  "> **Warning**\n",
1335
  ">\n",
1336
+ "> First, let's save the data, because sometimes PyTorch and juliacall's C bindings interfere and cause the colab kernel to crash. If we need to restart, we can just load the data without having to retrain the network:"
1337
  ]
1338
  },
1339
  {
 
1364
  "id": "krhaNlwFG-KT"
1365
  },
1366
  "source": [
1367
+ "We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts juliacall)."
1368
  ]
1369
  },
1370
  {
 
1418
  "id": "1a738a33"
1419
  },
1420
  "source": [
1421
+ "If this segfaults, restart the notebook, and run the initial imports and juliacall part, but skip the PyTorch training. This is because PyTorch's C binding tends to interefere with juliacall. You can then re-run the `pkl.load` cell to import the data."
1422
  ]
1423
  },
1424
  {
pyproject.toml CHANGED
@@ -1,2 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [tool.isort]
2
  profile = "black"
 
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pysr"
7
+ version = "0.17.0"
8
+ authors = [
9
+ {name = "Miles Cranmer", email = "[email protected]"},
10
+ ]
11
+ description = "Simple and efficient symbolic regression"
12
+ readme = {file = "README.md", content-type = "text/markdown"}
13
+ license = {file = "LICENSE"}
14
+ requires-python = ">=3.7"
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ "License :: OSI Approved :: Apache Software License"
19
+ ]
20
+ dynamic = ["dependencies"]
21
+
22
+ [tool.setuptools]
23
+ packages = ["pysr", "pysr._cli", "pysr.test"]
24
+ include-package-data = false
25
+ package-data = {pysr = ["juliapkg.json"]}
26
+
27
+ [tool.setuptools.dynamic]
28
+ dependencies = {file = "requirements.txt"}
29
+
30
  [tool.isort]
31
  profile = "black"
pysr/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ version.py
pysr/__init__.py CHANGED
@@ -1,26 +1,23 @@
1
- import sys
2
- import warnings
3
-
4
- if sys.version_info >= (3, 12, 0):
5
- warnings.warn(
6
- "PySR experiences occassional segfaults with Python 3.12. "
7
- + "Please use an earlier version of Python with PySR until this issue is resolved."
8
- )
9
 
10
  from . import sklearn_monkeypatch
11
- from .deprecated import best, best_callable, best_row, best_tex, pysr
12
  from .export_jax import sympy2jax
13
  from .export_torch import sympy2torch
14
- from .feynman_problems import FeynmanProblem, Problem
15
- from .julia_helpers import install
16
  from .sr import PySRRegressor
 
 
17
  from .version import __version__
18
 
19
  __all__ = [
 
 
20
  "sklearn_monkeypatch",
21
  "sympy2jax",
22
  "sympy2torch",
23
- "FeynmanProblem",
24
  "Problem",
25
  "install",
26
  "PySRRegressor",
 
1
+ # This must be imported as early as possible to prevent
2
+ # library linking issues caused by numpy/pytorch/etc. importing
3
+ # old libraries:
4
+ from .julia_import import jl, SymbolicRegression # isort:skip
 
 
 
 
5
 
6
  from . import sklearn_monkeypatch
7
+ from .deprecated import best, best_callable, best_row, best_tex, install, pysr
8
  from .export_jax import sympy2jax
9
  from .export_torch import sympy2torch
 
 
10
  from .sr import PySRRegressor
11
+
12
+ # This file is created by setuptools_scm during the build process:
13
  from .version import __version__
14
 
15
  __all__ = [
16
+ "jl",
17
+ "SymbolicRegression",
18
  "sklearn_monkeypatch",
19
  "sympy2jax",
20
  "sympy2torch",
 
21
  "Problem",
22
  "install",
23
  "PySRRegressor",
pysr/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
- from pysr._cli.main import pysr as _cli
2
 
3
  if __name__ == "__main__":
4
  _cli(prog_name="pysr")
 
1
+ from ._cli.main import pysr as _cli
2
 
3
  if __name__ == "__main__":
4
  _cli(prog_name="pysr")
pysr/_cli/main.py CHANGED
@@ -1,6 +1,17 @@
 
 
 
 
1
  import click
2
 
3
- from ..julia_helpers import install
 
 
 
 
 
 
 
4
 
5
 
6
  @click.group("pysr")
@@ -9,15 +20,13 @@ def pysr(context):
9
  ctx = context
10
 
11
 
12
- @pysr.command("install", help="Install Julia dependencies for PySR.")
13
  @click.option(
14
  "-p",
15
  "julia_project",
16
  "--project",
17
  default=None,
18
  type=str,
19
- help="Install in a specific Julia project (e.g., a local copy of SymbolicRegression.jl).",
20
- metavar="PROJECT_DIRECTORY",
21
  )
22
  @click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.")
23
  @click.option(
@@ -25,14 +34,55 @@ def pysr(context):
25
  "precompile",
26
  flag_value=True,
27
  default=None,
28
- help="Force precompilation of Julia libraries.",
29
  )
30
  @click.option(
31
  "--no-precompile",
32
  "precompile",
33
  flag_value=False,
34
  default=None,
35
- help="Disable precompilation.",
36
  )
37
  def _install(julia_project, quiet, precompile):
38
- install(julia_project, quiet, precompile)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import unittest
3
+ import warnings
4
+
5
  import click
6
 
7
+ from ..test import (
8
+ get_runtests_cli,
9
+ runtests,
10
+ runtests_dev,
11
+ runtests_jax,
12
+ runtests_startup,
13
+ runtests_torch,
14
+ )
15
 
16
 
17
  @click.group("pysr")
 
20
  ctx = context
21
 
22
 
23
+ @pysr.command("install", help="DEPRECATED (dependencies are now installed at import).")
24
  @click.option(
25
  "-p",
26
  "julia_project",
27
  "--project",
28
  default=None,
29
  type=str,
 
 
30
  )
31
  @click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.")
32
  @click.option(
 
34
  "precompile",
35
  flag_value=True,
36
  default=None,
 
37
  )
38
  @click.option(
39
  "--no-precompile",
40
  "precompile",
41
  flag_value=False,
42
  default=None,
 
43
  )
44
  def _install(julia_project, quiet, precompile):
45
+ warnings.warn(
46
+ "This command is deprecated. Julia dependencies are now installed at first import."
47
+ )
48
+
49
+
50
+ TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"}
51
+
52
+
53
+ @pysr.command("test")
54
+ @click.argument("tests", nargs=1)
55
+ def _tests(tests):
56
+ """Run parts of the PySR test suite.
57
+
58
+ Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
59
+ """
60
+ test_cases = []
61
+ for test in tests.split(","):
62
+ if test == "main":
63
+ test_cases.extend(runtests(just_tests=True))
64
+ elif test == "jax":
65
+ test_cases.extend(runtests_jax(just_tests=True))
66
+ elif test == "torch":
67
+ test_cases.extend(runtests_torch(just_tests=True))
68
+ elif test == "cli":
69
+ runtests_cli = get_runtests_cli()
70
+ test_cases.extend(runtests_cli(just_tests=True))
71
+ elif test == "dev":
72
+ test_cases.extend(runtests_dev(just_tests=True))
73
+ elif test == "startup":
74
+ test_cases.extend(runtests_startup(just_tests=True))
75
+ else:
76
+ warnings.warn(f"Invalid test {test}. Skipping.")
77
+
78
+ loader = unittest.TestLoader()
79
+ suite = unittest.TestSuite()
80
+ for test_case in test_cases:
81
+ suite.addTests(loader.loadTestsFromTestCase(test_case))
82
+ runner = unittest.TextTestRunner()
83
+ results = runner.run(suite)
84
+ # Normally unittest would run this, but here we have
85
+ # to do it manually to get the exit code.
86
+
87
+ if not results.wasSuccessful():
88
+ sys.exit(1)
pysr/deprecated.py CHANGED
@@ -1,6 +1,27 @@
1
  """Various functions to deprecate features."""
2
  import warnings
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def pysr(X, y, weights=None, **kwargs): # pragma: no cover
6
  from .sr import PySRRegressor
@@ -55,38 +76,28 @@ def best_callable(*args, **kwargs): # pragma: no cover
55
  )
56
 
57
 
58
- def make_deprecated_kwargs_for_pysr_regressor():
59
- """Create dict of deprecated kwargs."""
60
-
61
- deprecation_string = """
62
- fractionReplaced => fraction_replaced
63
- fractionReplacedHof => fraction_replaced_hof
64
- npop => population_size
65
- hofMigration => hof_migration
66
- shouldOptimizeConstants => should_optimize_constants
67
- weightAddNode => weight_add_node
68
- weightDeleteNode => weight_delete_node
69
- weightDoNothing => weight_do_nothing
70
- weightInsertNode => weight_insert_node
71
- weightMutateConstant => weight_mutate_constant
72
- weightMutateOperator => weight_mutate_operator
73
- weightSwapOperands => weight_swap_operands
74
- weightRandomize => weight_randomize
75
- weightSimplify => weight_simplify
76
- crossoverProbability => crossover_probability
77
- perturbationFactor => perturbation_factor
78
- batchSize => batch_size
79
- warmupMaxsizeBy => warmup_maxsize_by
80
- useFrequency => use_frequency
81
- useFrequencyInTournament => use_frequency_in_tournament
82
- """
83
- # Turn this into a dict:
84
- deprecated_kwargs = {}
85
- for line in deprecation_string.splitlines():
86
- line = line.replace(" ", "")
87
- if line == "":
88
- continue
89
- old, new = line.split("=>")
90
- deprecated_kwargs[old] = new
91
-
92
- return deprecated_kwargs
 
1
  """Various functions to deprecate features."""
2
  import warnings
3
 
4
+ from .julia_import import jl
5
+
6
+
7
+ def install(*args, **kwargs):
8
+ del args, kwargs
9
+ warnings.warn(
10
+ "The `install` function has been removed. "
11
+ "PySR now uses the `juliacall` package to install its dependencies automatically at import time. ",
12
+ FutureWarning,
13
+ )
14
+
15
+
16
+ def init_julia(*args, **kwargs):
17
+ del args, kwargs
18
+ warnings.warn(
19
+ "The `init_julia` function has been removed. "
20
+ "Julia is now initialized automatically at import time.",
21
+ FutureWarning,
22
+ )
23
+ return jl
24
+
25
 
26
  def pysr(X, y, weights=None, **kwargs): # pragma: no cover
27
  from .sr import PySRRegressor
 
76
  )
77
 
78
 
79
+ DEPRECATED_KWARGS = {
80
+ "fractionReplaced": "fraction_replaced",
81
+ "fractionReplacedHof": "fraction_replaced_hof",
82
+ "npop": "population_size",
83
+ "hofMigration": "hof_migration",
84
+ "shouldOptimizeConstants": "should_optimize_constants",
85
+ "weightAddNode": "weight_add_node",
86
+ "weightDeleteNode": "weight_delete_node",
87
+ "weightDoNothing": "weight_do_nothing",
88
+ "weightInsertNode": "weight_insert_node",
89
+ "weightMutateConstant": "weight_mutate_constant",
90
+ "weightMutateOperator": "weight_mutate_operator",
91
+ "weightSwapOperands": "weight_swap_operands",
92
+ "weightRandomize": "weight_randomize",
93
+ "weightSimplify": "weight_simplify",
94
+ "crossoverProbability": "crossover_probability",
95
+ "perturbationFactor": "perturbation_factor",
96
+ "batchSize": "batch_size",
97
+ "warmupMaxsizeBy": "warmup_maxsize_by",
98
+ "useFrequency": "use_frequency",
99
+ "useFrequencyInTournament": "use_frequency_in_tournament",
100
+ "ncyclesperiteration": "ncycles_per_iteration",
101
+ "loss": "elementwise_loss",
102
+ "full_objective": "loss_function",
103
+ }
 
 
 
 
 
 
 
 
 
 
pysr/feynman_problems.py DELETED
@@ -1,176 +0,0 @@
1
- import csv
2
- from functools import partial
3
- from pathlib import Path
4
-
5
- import numpy as np
6
-
7
- from .deprecated import best, pysr
8
-
9
- PKG_DIR = Path(__file__).parents[1]
10
- FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv"
11
-
12
-
13
- class Problem:
14
- """
15
- Problem API to work with PySR.
16
-
17
- Has attributes: X, y as pysr accepts, form which is a string representing the correct equation and variable_names
18
-
19
- Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work
20
- """
21
-
22
- def __init__(self, X, y, form=None, variable_names=None):
23
- self.X = X
24
- self.y = y
25
- self.form = form
26
- self.variable_names = variable_names
27
-
28
-
29
- class FeynmanProblem(Problem):
30
- """
31
- Stores the data for the problems from the 100 Feynman Equations on Physics.
32
- This is the benchmark used in the AI Feynman Paper
33
- """
34
-
35
- def __init__(self, row, gen=False, dp=500):
36
- """
37
- row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo
38
- gen: If true the problem will have dp X and y values randomly generated else they will be None
39
- """
40
- self.eq_id = row["Filename"]
41
- self.n_vars = int(row["# variables"])
42
- super(FeynmanProblem, self).__init__(
43
- None,
44
- None,
45
- form=row["Formula"],
46
- variable_names=[row[f"v{i + 1}_name"] for i in range(self.n_vars)],
47
- )
48
- self.low = [float(row[f"v{i+1}_low"]) for i in range(self.n_vars)]
49
- self.high = [float(row[f"v{i+1}_high"]) for i in range(self.n_vars)]
50
- self.dp = dp
51
- if gen:
52
- self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
53
- d = {}
54
- for var in range(len(self.variable_names)):
55
- d[self.variable_names[var]] = self.X[:, var]
56
- d["exp"] = np.exp
57
- d["sqrt"] = np.sqrt
58
- d["pi"] = np.pi
59
- d["cos"] = np.cos
60
- d["sin"] = np.sin
61
- d["tan"] = np.tan
62
- d["tanh"] = np.tanh
63
- d["ln"] = np.log
64
- d["log"] = np.log # Quite sure the Feynman dataset has no base 10 logs
65
- d["arcsin"] = np.arcsin
66
- self.y = eval(self.form, d)
67
-
68
- def __str__(self):
69
- return f"Feynman Equation: {self.eq_id}|Form: {self.form}"
70
-
71
- def __repr__(self):
72
- return str(self)
73
-
74
-
75
- def mk_problems(first=100, gen=False, dp=500, data_dir=FEYNMAN_DATASET):
76
- """
77
-
78
- first: the first "first" equations from the dataset will be made into problems
79
- data_dir: the path pointing to the Feynman Equations csv
80
- returns: list of FeynmanProblems
81
- """
82
- ret = []
83
- with open(data_dir) as csvfile:
84
- reader = csv.DictReader(csvfile)
85
- for i, row in enumerate(reader):
86
- if i > first:
87
- break
88
- if row["Filename"] == "":
89
- continue
90
- p = FeynmanProblem(row, gen=gen, dp=dp)
91
- ret.append(p)
92
- return ret
93
-
94
-
95
- def run_on_problem(problem, verbosity=0, multiprocessing=True):
96
- """
97
- Takes in a problem and returns a tuple: (equations, best predicted equation, actual equation)
98
- """
99
- from time import time
100
-
101
- starting = time()
102
- equations = pysr(
103
- problem.X,
104
- problem.y,
105
- variable_names=problem.variable_names,
106
- verbosity=verbosity,
107
- )
108
- timing = time() - starting
109
- others = {"time": timing, "problem": problem}
110
- if not multiprocessing:
111
- others["equations"] = equations
112
- return str(best(equations)), problem.form, others
113
-
114
-
115
- def do_feynman_experiments_parallel(
116
- first=100,
117
- verbosity=0,
118
- dp=500,
119
- output_file_path="FeynmanExperiment.csv",
120
- data_dir=FEYNMAN_DATASET,
121
- ):
122
- import multiprocessing as mp
123
-
124
- from tqdm import tqdm
125
-
126
- problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
127
- ids = []
128
- predictions = []
129
- true_equations = []
130
- time_takens = []
131
- pool = mp.Pool()
132
- results = []
133
- with tqdm(total=len(problems)) as pbar:
134
- f = partial(run_on_problem, verbosity=verbosity)
135
- for i, res in enumerate(pool.imap(f, problems)):
136
- results.append(res)
137
- pbar.update()
138
- for res in results:
139
- prediction, true_equation, others = res
140
- problem = others["problem"]
141
- ids.append(problem.eq_id)
142
- predictions.append(prediction)
143
- true_equations.append(true_equation)
144
- time_takens.append(others["time"])
145
- with open(output_file_path, "a") as f:
146
- writer = csv.writer(f, delimiter=",")
147
- writer.writerow(["ID", "Predicted", "True", "Time"])
148
- for i in range(len(ids)):
149
- writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
150
-
151
-
152
- def do_feynman_experiments(
153
- first=100,
154
- verbosity=0,
155
- dp=500,
156
- output_file_path="FeynmanExperiment.csv",
157
- data_dir=FEYNMAN_DATASET,
158
- ):
159
- from tqdm import tqdm
160
-
161
- problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir)
162
- ids = []
163
- predictions = []
164
- true_equations = []
165
- time_takens = []
166
- for problem in tqdm(problems):
167
- prediction, true_equation, others = run_on_problem(problem, verbosity)
168
- ids.append(problem.eq_id)
169
- predictions.append(prediction)
170
- true_equations.append(true_equation)
171
- time_takens.append(others["time"])
172
- with open(output_file_path, "a") as f:
173
- writer = csv.writer(f, delimiter=",")
174
- writer.writerow(["ID", "Predicted", "True", "Time"])
175
- for i in range(len(ids)):
176
- writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pysr/julia_helpers.py CHANGED
@@ -1,284 +1,18 @@
1
  """Functions for initializing the Julia environment and installing deps."""
2
- import os
3
- import subprocess
4
- import sys
5
- import warnings
6
- from pathlib import Path
7
 
8
- from julia.api import JuliaError
 
9
 
10
- from .version import __symbolic_regression_jl_version__, __version__
 
11
 
12
- juliainfo = None
13
- julia_initialized = False
14
- julia_kwargs_at_initialization = None
15
- julia_activated_env = None
16
 
 
 
17
 
18
- def _load_juliainfo():
19
- """Execute julia.core.JuliaInfo.load(), and store as juliainfo."""
20
- global juliainfo
21
-
22
- if juliainfo is None:
23
- from julia.core import JuliaInfo
24
-
25
- try:
26
- juliainfo = JuliaInfo.load(julia="julia")
27
- except FileNotFoundError:
28
- env_path = os.environ["PATH"]
29
- raise FileNotFoundError(
30
- f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
31
- )
32
-
33
- return juliainfo
34
-
35
-
36
- def _get_julia_env_dir():
37
- # Have to manually get env dir:
38
- try:
39
- julia_env_dir_str = subprocess.run(
40
- ["julia", "-e using Pkg; print(Pkg.envdir())"],
41
- capture_output=True,
42
- env=os.environ,
43
- ).stdout.decode()
44
- except FileNotFoundError:
45
- env_path = os.environ["PATH"]
46
- raise FileNotFoundError(
47
- f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
48
- )
49
- return Path(julia_env_dir_str)
50
-
51
-
52
- def _set_julia_project_env(julia_project, is_shared):
53
- if is_shared:
54
- if is_julia_version_greater_eq(version=(1, 7, 0)):
55
- os.environ["JULIA_PROJECT"] = "@" + str(julia_project)
56
- else:
57
- julia_env_dir = _get_julia_env_dir()
58
- os.environ["JULIA_PROJECT"] = str(julia_env_dir / julia_project)
59
- else:
60
- os.environ["JULIA_PROJECT"] = str(julia_project)
61
-
62
-
63
- def _get_io_arg(quiet):
64
- io = "devnull" if quiet else "stderr"
65
- io_arg = f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else ""
66
- return io_arg
67
-
68
-
69
- def install(julia_project=None, quiet=False, precompile=None): # pragma: no cover
70
- """
71
- Install PyCall.jl and all required dependencies for SymbolicRegression.jl.
72
-
73
- Also updates the local Julia registry.
74
- """
75
- import julia
76
-
77
- _julia_version_assertion()
78
- # Set JULIA_PROJECT so that we install in the pysr environment
79
- processed_julia_project, is_shared = _process_julia_project(julia_project)
80
- _set_julia_project_env(processed_julia_project, is_shared)
81
-
82
- if precompile == False:
83
- os.environ["JULIA_PKG_PRECOMPILE_AUTO"] = "0"
84
-
85
- try:
86
- julia.install(quiet=quiet)
87
- except julia.tools.PyCallInstallError:
88
- # Attempt to reset PyCall.jl's build:
89
- subprocess.run(
90
- [
91
- "julia",
92
- "-e",
93
- f'ENV["PYTHON"] = "{sys.executable}"; import Pkg; Pkg.build("PyCall")',
94
- ],
95
- )
96
- # Try installing again:
97
- try:
98
- julia.install(quiet=quiet)
99
- except julia.tools.PyCallInstallError:
100
- warnings.warn(
101
- "PyCall.jl failed to install on second attempt. "
102
- + "Please consult the GitHub issue "
103
- + "https://github.com/MilesCranmer/PySR/issues/257 "
104
- + "for advice on fixing this."
105
- )
106
-
107
- Main, init_log = init_julia(julia_project, quiet=quiet, return_aux=True)
108
- io_arg = _get_io_arg(quiet)
109
-
110
- if precompile is None:
111
- precompile = init_log["compiled_modules"]
112
-
113
- if not precompile:
114
- Main.eval('ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0')
115
-
116
- if is_shared:
117
- # Install SymbolicRegression.jl:
118
- _add_sr_to_julia_project(Main, io_arg)
119
-
120
- Main.eval("using Pkg")
121
- Main.eval(f"Pkg.instantiate({io_arg})")
122
-
123
- if precompile:
124
- Main.eval(f"Pkg.precompile({io_arg})")
125
-
126
- if not quiet:
127
- warnings.warn(
128
- "It is recommended to restart Python after installing PySR's dependencies,"
129
- " so that the Julia environment is properly initialized."
130
- )
131
-
132
-
133
- def _import_error():
134
- return """
135
- Required dependencies are not installed or built. Run the following command in your terminal:
136
- python3 -m pysr install
137
- """
138
-
139
-
140
- def _process_julia_project(julia_project):
141
- if julia_project is None:
142
- is_shared = True
143
- processed_julia_project = f"pysr-{__version__}"
144
- elif julia_project[0] == "@":
145
- is_shared = True
146
- processed_julia_project = julia_project[1:]
147
- else:
148
- is_shared = False
149
- processed_julia_project = Path(julia_project)
150
- return processed_julia_project, is_shared
151
-
152
-
153
- def is_julia_version_greater_eq(juliainfo=None, version=(1, 6, 0)):
154
- """Check if Julia version is greater than specified version."""
155
- if juliainfo is None:
156
- juliainfo = _load_juliainfo()
157
- current_version = (
158
- juliainfo.version_major,
159
- juliainfo.version_minor,
160
- juliainfo.version_patch,
161
- )
162
- return current_version >= version
163
-
164
-
165
- def _check_for_conflicting_libraries(): # pragma: no cover
166
- """Check whether there are conflicting modules, and display warnings."""
167
- # See https://github.com/pytorch/pytorch/issues/78829: importing
168
- # pytorch before running `pysr.fit` causes a segfault.
169
- torch_is_loaded = "torch" in sys.modules
170
- if torch_is_loaded:
171
- warnings.warn(
172
- "`torch` was loaded before the Julia instance started. "
173
- "This may cause a segfault when running `PySRRegressor.fit`. "
174
- "To avoid this, please run `pysr.julia_helpers.init_julia()` *before* "
175
- "importing `torch`. "
176
- "For updates, see https://github.com/pytorch/pytorch/issues/78829"
177
- )
178
-
179
-
180
- def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=False):
181
- """Initialize julia binary, turning off compiled modules if needed."""
182
- global julia_initialized
183
- global julia_kwargs_at_initialization
184
- global julia_activated_env
185
-
186
- if not julia_initialized:
187
- _check_for_conflicting_libraries()
188
-
189
- if julia_kwargs is None:
190
- julia_kwargs = {"optimize": 3}
191
-
192
- from julia.core import JuliaInfo, UnsupportedPythonError
193
-
194
- _julia_version_assertion()
195
- processed_julia_project, is_shared = _process_julia_project(julia_project)
196
- _set_julia_project_env(processed_julia_project, is_shared)
197
-
198
- try:
199
- info = JuliaInfo.load(julia="julia")
200
- except FileNotFoundError:
201
- env_path = os.environ["PATH"]
202
- raise FileNotFoundError(
203
- f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}",
204
- )
205
-
206
- if not info.is_pycall_built():
207
- raise ImportError(_import_error())
208
-
209
- from julia.core import Julia
210
-
211
- try:
212
- Julia(**julia_kwargs)
213
- except UnsupportedPythonError:
214
- # Static python binary, so we turn off pre-compiled modules.
215
- julia_kwargs = {**julia_kwargs, "compiled_modules": False}
216
- Julia(**julia_kwargs)
217
- warnings.warn(
218
- "Your system's Python library is static (e.g., conda), so precompilation will be turned off. For a dynamic library, try using `pyenv` and installing with `--enable-shared`: https://github.com/pyenv/pyenv/blob/master/plugins/python-build/README.md#building-with---enable-shared."
219
- )
220
-
221
- using_compiled_modules = (not "compiled_modules" in julia_kwargs) or julia_kwargs[
222
- "compiled_modules"
223
- ]
224
-
225
- from julia import Main as _Main
226
-
227
- Main = _Main
228
-
229
- if julia_activated_env is None:
230
- julia_activated_env = processed_julia_project
231
-
232
- if julia_initialized and julia_kwargs_at_initialization is not None:
233
- # Check if the kwargs are the same as the previous initialization
234
- init_set = set(julia_kwargs_at_initialization.items())
235
- new_set = set(julia_kwargs.items())
236
- set_diff = new_set - init_set
237
- # Remove the `compiled_modules` key, since it is not a user-specified kwarg:
238
- set_diff = {k: v for k, v in set_diff if k != "compiled_modules"}
239
- if len(set_diff) > 0:
240
- warnings.warn(
241
- "Julia has already started. The new Julia options "
242
- + str(set_diff)
243
- + " will be ignored."
244
- )
245
-
246
- if julia_initialized and julia_activated_env != processed_julia_project:
247
- Main.eval("using Pkg")
248
-
249
- io_arg = _get_io_arg(quiet)
250
- # Can't pass IO to Julia call as it evaluates to PyObject, so just directly
251
- # use Main.eval:
252
- Main.eval(
253
- f'Pkg.activate("{_escape_filename(processed_julia_project)}",'
254
- f"shared = Bool({int(is_shared)}), "
255
- f"{io_arg})"
256
- )
257
-
258
- julia_activated_env = processed_julia_project
259
-
260
- if not julia_initialized:
261
- julia_kwargs_at_initialization = julia_kwargs
262
-
263
- julia_initialized = True
264
- if return_aux:
265
- return Main, {"compiled_modules": using_compiled_modules}
266
- return Main
267
-
268
-
269
- def _add_sr_to_julia_project(Main, io_arg):
270
- Main.eval("using Pkg")
271
- Main.eval("Pkg.Registry.update()")
272
- Main.sr_spec = Main.PackageSpec(
273
- name="SymbolicRegression",
274
- url="https://github.com/MilesCranmer/SymbolicRegression.jl",
275
- rev="v" + __symbolic_regression_jl_version__,
276
- )
277
- Main.clustermanagers_spec = Main.PackageSpec(
278
- name="ClusterManagers",
279
- version="0.4",
280
- )
281
- Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})")
282
 
283
 
284
  def _escape_filename(filename):
@@ -288,60 +22,27 @@ def _escape_filename(filename):
288
  return str_repr
289
 
290
 
291
- def _julia_version_assertion():
292
- if not is_julia_version_greater_eq(version=(1, 6, 0)):
293
- raise NotImplementedError(
294
- "PySR requires Julia 1.6.0 or greater. "
295
- "Please update your Julia installation."
296
- )
297
-
298
-
299
- def _backend_version_assertion(Main):
300
- try:
301
- backend_version = Main.eval("string(SymbolicRegression.PACKAGE_VERSION)")
302
- expected_backend_version = __symbolic_regression_jl_version__
303
- if backend_version != expected_backend_version: # pragma: no cover
304
- warnings.warn(
305
- f"PySR backend (SymbolicRegression.jl) version {backend_version} "
306
- f"does not match expected version {expected_backend_version}. "
307
- "Things may break. "
308
- "Please update your PySR installation with "
309
- "`python3 -m pysr install`."
310
- )
311
- except JuliaError: # pragma: no cover
312
- warnings.warn(
313
- "You seem to have an outdated version of SymbolicRegression.jl. "
314
- "Things may break. "
315
- "Please update your PySR installation with "
316
- "`python3 -m pysr install`."
317
- )
318
-
319
-
320
- def _load_cluster_manager(Main, cluster_manager):
321
- Main.eval(f"import ClusterManagers: addprocs_{cluster_manager}")
322
- return Main.eval(f"addprocs_{cluster_manager}")
323
-
324
 
325
- def _update_julia_project(Main, is_shared, io_arg):
326
- try:
327
- if is_shared:
328
- _add_sr_to_julia_project(Main, io_arg)
329
- Main.eval("using Pkg")
330
- Main.eval(f"Pkg.resolve({io_arg})")
331
- except (JuliaError, RuntimeError) as e:
332
- raise ImportError(_import_error()) from e
333
 
 
 
 
 
334
 
335
- def _load_backend(Main):
336
- try:
337
- # Load namespace, so that various internal operators work:
338
- Main.eval("using SymbolicRegression")
339
- except (JuliaError, RuntimeError) as e:
340
- raise ImportError(_import_error()) from e
341
 
342
- _backend_version_assertion(Main)
 
 
 
343
 
344
- # Load Julia package SymbolicRegression.jl
345
- from julia import SymbolicRegression
346
 
347
- return SymbolicRegression
 
 
 
 
 
 
 
1
  """Functions for initializing the Julia environment and installing deps."""
 
 
 
 
 
2
 
3
+ import numpy as np
4
+ from juliacall import convert as jl_convert # type: ignore
5
 
6
+ from .deprecated import init_julia, install
7
+ from .julia_import import jl
8
 
9
+ jl.seval("using Serialization: Serialization")
10
+ jl.seval("using PythonCall: PythonCall")
 
 
11
 
12
+ Serialization = jl.Serialization
13
+ PythonCall = jl.PythonCall
14
 
15
+ jl.seval("using SymbolicRegression: plus, sub, mult, div, pow")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  def _escape_filename(filename):
 
22
  return str_repr
23
 
24
 
25
+ def _load_cluster_manager(cluster_manager):
26
+ jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
27
+ return jl.seval(f"addprocs_{cluster_manager}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
 
 
 
29
 
30
+ def jl_array(x):
31
+ if x is None:
32
+ return None
33
+ return jl_convert(jl.Array, x)
34
 
 
 
 
 
 
 
35
 
36
+ def jl_serialize(obj):
37
+ buf = jl.IOBuffer()
38
+ Serialization.serialize(buf, obj)
39
+ return np.array(jl.take_b(buf))
40
 
 
 
41
 
42
+ def jl_deserialize(s):
43
+ if s is None:
44
+ return s
45
+ buf = jl.IOBuffer()
46
+ jl.write(buf, jl_array(s))
47
+ jl.seekstart(buf)
48
+ return Serialization.deserialize(buf)
pysr/julia_import.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import warnings
4
+
5
+ # Check if JuliaCall is already loaded, and if so, warn the user
6
+ # about the relevant environment variables. If not loaded,
7
+ # set up sensible defaults.
8
+ if "juliacall" in sys.modules:
9
+ warnings.warn(
10
+ "juliacall module already imported. "
11
+ "Make sure that you have set the environment variable `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` to avoid segfaults. "
12
+ "Also note that PySR will not be able to configure `PYTHON_JULIACALL_THREADS` or `PYTHON_JULIACALL_OPTLEVEL` for you."
13
+ )
14
+ else:
15
+ # Required to avoid segfaults (https://juliapy.github.io/PythonCall.jl/dev/faq/)
16
+ if os.environ.get("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes") != "yes":
17
+ warnings.warn(
18
+ "PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set to something other than 'yes' or ''. "
19
+ + "You will experience segfaults if running with multithreading."
20
+ )
21
+
22
+ if os.environ.get("PYTHON_JULIACALL_THREADS", "auto") != "auto":
23
+ warnings.warn(
24
+ "PYTHON_JULIACALL_THREADS environment variable is set to something other than 'auto', "
25
+ "so PySR was not able to set it. You may wish to set it to `'auto'` for full use "
26
+ "of your CPU."
27
+ )
28
+
29
+ # TODO: Remove these when juliapkg lets you specify this
30
+ for k, default in (
31
+ ("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"),
32
+ ("PYTHON_JULIACALL_THREADS", "auto"),
33
+ ("PYTHON_JULIACALL_OPTLEVEL", "3"),
34
+ ):
35
+ os.environ[k] = os.environ.get(k, default)
36
+
37
+
38
+ from juliacall import Main as jl # type: ignore
39
+
40
+ # Overwrite the seval function to use Meta.parseall
41
+ # instead of Meta.parse.
42
+ jl.seval("using PythonCall: PythonCall, Py, pyconvert")
43
+ jl.seval(
44
+ """function PythonCall.pyjlmodule_seval(self::Module, expr::Py)
45
+ e = Meta.parseall(strip(pyconvert(String, expr)))
46
+ Py(Base.eval(self, e))
47
+ end"""
48
+ )
49
+ # ^TODO: Overwrite this once PythonCall.jl is updated:
50
+
51
+ jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
52
+
53
+ # Next, automatically load the juliacall extension if we're in a Jupyter notebook
54
+ autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS", "yes")
55
+ if autoload_extensions in {"yes", ""} and jl_version >= (1, 9, 0):
56
+ try:
57
+ get_ipython = sys.modules["IPython"].get_ipython
58
+
59
+ if "IPKernelApp" not in get_ipython().config:
60
+ raise ImportError("console")
61
+
62
+ print(
63
+ "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable."
64
+ )
65
+
66
+ # TODO: Turn this off if juliacall does this automatically
67
+ get_ipython().run_line_magic("load_ext", "juliacall")
68
+ except Exception:
69
+ pass
70
+ elif autoload_extensions not in {"no", "yes", ""}:
71
+ warnings.warn(
72
+ "PYSR_AUTOLOAD_EXTENSIONS environment variable is set to something other than 'yes' or 'no' or ''."
73
+ )
74
+
75
+ jl.seval("using SymbolicRegression")
76
+ SymbolicRegression = jl.SymbolicRegression
pysr/juliapkg.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "julia": "1.6",
3
+ "packages": {
4
+ "SymbolicRegression": {
5
+ "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
6
+ "version": "=0.23.1"
7
+ },
8
+ "ClusterManagers": {
9
+ "uuid": "34f1f09b-3a8b-5176-ab39-66d58a4d544e",
10
+ "version": "0.4"
11
+ },
12
+ "Serialization": {
13
+ "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
14
+ "version": "1"
15
+ },
16
+ "Zygote": {
17
+ "uuid": "e88e6eb3-aa80-5325-afca-941959d7151f",
18
+ "version": "0.6"
19
+ }
20
+ }
21
+ }
pysr/param_groupings.yml CHANGED
@@ -8,10 +8,10 @@
8
  - niterations
9
  - populations
10
  - population_size
11
- - ncyclesperiteration
12
  - The Objective:
13
- - loss
14
- - full_objective
15
  - model_selection
16
  - dimensional_constraint_penalty
17
  - Working with Complexities:
@@ -88,9 +88,7 @@
88
  - temp_equation_file
89
  - tempdir
90
  - delete_tempfiles
91
- - julia_project
92
  - update
93
- - julia_kwargs
94
  - Exporting the Results:
95
  - equation_file
96
  - output_jax_format
 
8
  - niterations
9
  - populations
10
  - population_size
11
+ - ncycles_per_iteration
12
  - The Objective:
13
+ - elementwise_loss
14
+ - loss_function
15
  - model_selection
16
  - dimensional_constraint_penalty
17
  - Working with Complexities:
 
88
  - temp_equation_file
89
  - tempdir
90
  - delete_tempfiles
 
91
  - update
 
92
  - Exporting the Results:
93
  - equation_file
94
  - output_jax_format
pysr/sklearn_monkeypatch.py CHANGED
@@ -9,5 +9,5 @@ def _ensure_no_complex_data(*args, **kwargs):
9
 
10
  try:
11
  validation._ensure_no_complex_data = _ensure_no_complex_data
12
- except AttributeError:
13
  ...
 
9
 
10
  try:
11
  validation._ensure_no_complex_data = _ensure_no_complex_data
12
+ except AttributeError: # pragma: no cover
13
  ...
pysr/sr.py CHANGED
@@ -25,7 +25,7 @@ from sklearn.utils import check_array, check_consistent_length, check_random_sta
25
  from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
26
 
27
  from .denoising import denoise, multi_denoise
28
- from .deprecated import make_deprecated_kwargs_for_pysr_regressor
29
  from .export_jax import sympy2jax
30
  from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
31
  from .export_numpy import sympy2numpy
@@ -33,14 +33,14 @@ from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2
33
  from .export_torch import sympy2torch
34
  from .feature_selection import run_feature_selection
35
  from .julia_helpers import (
 
36
  _escape_filename,
37
- _load_backend,
38
  _load_cluster_manager,
39
- _process_julia_project,
40
- _update_julia_project,
41
- init_julia,
42
- is_julia_version_greater_eq,
43
  )
 
44
  from .utils import (
45
  _csv_filename_to_pkl_filename,
46
  _preprocess_julia_floats,
@@ -48,8 +48,6 @@ from .utils import (
48
  _subscriptify,
49
  )
50
 
51
- Main = None # TODO: Rename to more descriptive name like "julia_runtime"
52
-
53
  already_ran = False
54
 
55
 
@@ -92,7 +90,6 @@ def _process_constraints(binary_operators, unary_operators, constraints):
92
  def _maybe_create_inline_operators(
93
  binary_operators, unary_operators, extra_sympy_mappings
94
  ):
95
- global Main
96
  binary_operators = binary_operators.copy()
97
  unary_operators = unary_operators.copy()
98
  for op_list in [binary_operators, unary_operators]:
@@ -100,7 +97,7 @@ def _maybe_create_inline_operators(
100
  is_user_defined_operator = "(" in op
101
 
102
  if is_user_defined_operator:
103
- Main.eval(op)
104
  # Cut off from the first non-alphanumeric char:
105
  first_non_char = [j for j, char in enumerate(op) if char == "("][0]
106
  function_name = op[:first_non_char]
@@ -271,7 +268,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
271
  arguments are treated the same way, and the max of each
272
  argument is constrained.
273
  Default is `None`.
274
- loss : str
275
  String of Julia code specifying an elementwise loss function.
276
  Can either be a loss from LossFunctions.jl, or your own loss
277
  written as a function. Examples of custom written losses include:
@@ -287,11 +284,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
287
  `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
288
  `SigmoidLoss()`, `DWDMarginLoss(q)`.
289
  Default is `"L2DistLoss()"`.
290
- full_objective : str
291
  Alternatively, you can specify the full objective function as
292
  a snippet of Julia code, including any sort of custom evaluation
293
  (including symbolic manipulations beforehand), and any sort
294
- of loss function or regularizations. The default `full_objective`
295
  used in SymbolicRegression.jl is roughly equal to:
296
  ```julia
297
  function eval_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}
@@ -357,7 +354,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
357
  takes a loss and complexity as input, for example:
358
  `"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
359
  Default is `None`.
360
- ncyclesperiteration : int
361
  Number of total mutations to run, per 10 samples of the
362
  population, per iteration.
363
  Default is `550`.
@@ -401,7 +398,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
401
  Constant optimization can also be performed as a mutation, in addition to
402
  the normal strategy controlled by `optimize_probability` which happens
403
  every iteration. Using it as a mutation is useful if you want to use
404
- a large `ncyclesperiteration`, and may not optimize very often.
405
  Default is `0.0`.
406
  crossover_probability : float
407
  Absolute probability of crossover-type genetic operation, instead of a mutation.
@@ -536,11 +533,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
536
  delete_tempfiles : bool
537
  Whether to delete the temporary files after finishing.
538
  Default is `True`.
539
- julia_project : str
540
- A Julia environment location containing a Project.toml
541
- (and potentially the source code for SymbolicRegression.jl).
542
- Default gives the Python package directory, where a
543
- Project.toml file should be present from the install.
544
  update: bool
545
  Whether to automatically update Julia packages when `fit` is called.
546
  You should make sure that PySR is up-to-date itself first, as
@@ -585,11 +577,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
585
  before passing to the symbolic regression code. None means no
586
  feature selection; an int means select that many features.
587
  Default is `None`.
588
- julia_kwargs : dict
589
- Keyword arguments to pass to `julia.core.Julia(...)` to initialize
590
- the Julia runtime. The default, when `None`, is to set `threads` equal
591
- to `procs`, and `optimize` to 3.
592
- Default is `None`.
593
  **kwargs : dict
594
  Supports deprecated keyword arguments. Other arguments will
595
  result in an error.
@@ -617,8 +604,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
617
  Path to the temporary equations directory.
618
  equation_file_ : str
619
  Output equation file name produced by the julia backend.
620
- raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
621
- The state for the julia SymbolicRegression.jl backend post fitting.
 
 
 
 
 
 
 
622
  equation_file_contents_ : list[pandas.DataFrame]
623
  Contents of the equation file output by the Julia backend.
624
  show_pickle_warnings_ : bool
@@ -643,7 +637,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
643
  ... "inv(x) = 1/x", # Custom operator (julia syntax)
644
  ... ],
645
  ... model_selection="best",
646
- ... loss="loss(x, y) = (x - y)^2", # Custom loss function (julia syntax)
647
  ... )
648
  >>> model.fit(X, y)
649
  >>> model
@@ -681,8 +675,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
681
  timeout_in_seconds: Optional[float] = None,
682
  constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
683
  nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
684
- loss: Optional[str] = None,
685
- full_objective: Optional[str] = None,
686
  complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
687
  complexity_of_constants: Union[int, float] = 1,
688
  complexity_of_variables: Union[int, float] = 1,
@@ -694,7 +688,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
694
  alpha: float = 0.1,
695
  annealing: bool = False,
696
  early_stop_condition: Optional[Union[float, str]] = None,
697
- ncyclesperiteration: int = 550,
698
  fraction_replaced: float = 0.000364,
699
  fraction_replaced_hof: float = 0.035,
700
  weight_add_node: float = 0.79,
@@ -744,7 +738,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
744
  temp_equation_file: bool = False,
745
  tempdir: Optional[str] = None,
746
  delete_tempfiles: bool = True,
747
- julia_project: Optional[str] = None,
748
  update: bool = False,
749
  output_jax_format: bool = False,
750
  output_torch_format: bool = False,
@@ -753,7 +746,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
753
  extra_jax_mappings: Optional[Dict[Callable, str]] = None,
754
  denoise: bool = False,
755
  select_k_features: Optional[int] = None,
756
- julia_kwargs: Optional[Dict] = None,
757
  **kwargs,
758
  ):
759
  # Hyperparameters
@@ -764,7 +756,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
764
  self.niterations = niterations
765
  self.populations = populations
766
  self.population_size = population_size
767
- self.ncyclesperiteration = ncyclesperiteration
768
  # - Equation Constraints
769
  self.maxsize = maxsize
770
  self.maxdepth = maxdepth
@@ -777,8 +769,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
777
  self.timeout_in_seconds = timeout_in_seconds
778
  self.early_stop_condition = early_stop_condition
779
  # - Loss parameters
780
- self.loss = loss
781
- self.full_objective = full_objective
782
  self.complexity_of_operators = complexity_of_operators
783
  self.complexity_of_constants = complexity_of_constants
784
  self.complexity_of_variables = complexity_of_variables
@@ -844,7 +836,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
844
  self.temp_equation_file = temp_equation_file
845
  self.tempdir = tempdir
846
  self.delete_tempfiles = delete_tempfiles
847
- self.julia_project = julia_project
848
  self.update = update
849
  self.output_jax_format = output_jax_format
850
  self.output_torch_format = output_torch_format
@@ -854,16 +845,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
854
  # Pre-modelling transformation
855
  self.denoise = denoise
856
  self.select_k_features = select_k_features
857
- self.julia_kwargs = julia_kwargs
858
 
859
  # Once all valid parameters have been assigned handle the
860
  # deprecated kwargs
861
  if len(kwargs) > 0: # pragma: no cover
862
- deprecated_kwargs = make_deprecated_kwargs_for_pysr_regressor()
863
  for k, v in kwargs.items():
864
  # Handle renamed kwargs
865
- if k in deprecated_kwargs:
866
- updated_kwarg_name = deprecated_kwargs[k]
867
  setattr(self, updated_kwarg_name, v)
868
  warnings.warn(
869
  f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
@@ -877,6 +866,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
877
  f"Ignoring parameter; please pass {k} during the call to fit instead.",
878
  FutureWarning,
879
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
880
  else:
881
  raise TypeError(
882
  f"{k} is not a valid keyword argument for PySRRegressor."
@@ -1051,7 +1053,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1051
  serialization.
1052
 
1053
  Thus, for `PySRRegressor` to support pickle serialization, the
1054
- `raw_julia_state_` attribute must be hidden from pickle. This will
1055
  prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
1056
  but does allow all other attributes of a fitted `PySRRegressor` estimator
1057
  to be serialized. Note: Jax and Torch format equations are also removed
@@ -1061,12 +1063,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1061
  show_pickle_warning = not (
1062
  "show_pickle_warnings_" in state and not state["show_pickle_warnings_"]
1063
  )
1064
- if "raw_julia_state_" in state and show_pickle_warning:
1065
- warnings.warn(
1066
- "raw_julia_state_ cannot be pickled and will be removed from the "
1067
- "serialized instance. This will prevent a `warm_start` fit of any "
1068
- "model that is deserialized via `pickle.load()`."
1069
- )
1070
  state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"]
1071
  for state_key in state_keys_containing_lambdas:
1072
  if state[state_key] is not None and show_pickle_warning:
@@ -1075,7 +1071,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1075
  "serialized instance. When loading the model, please redefine "
1076
  f"`{state_key}` at runtime."
1077
  )
1078
- state_keys_to_clear = ["raw_julia_state_"] + state_keys_containing_lambdas
1079
  pickled_state = {
1080
  key: (None if key in state_keys_to_clear else value)
1081
  for key, value in state.items()
@@ -1125,6 +1121,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1125
  )
1126
  return self.equations_
1127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1128
  def get_best(self, index=None):
1129
  """
1130
  Get best equation using `model_selection`.
@@ -1238,8 +1252,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1238
  "to True and `procs` to 0 will result in non-deterministic searches. "
1239
  )
1240
 
1241
- if self.loss is not None and self.full_objective is not None:
1242
- raise ValueError("You cannot set both `loss` and `full_objective`.")
 
 
1243
 
1244
  # NotImplementedError - Values that could be supported at a later time
1245
  if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
@@ -1291,16 +1307,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1291
  > 0
1292
  )
1293
 
1294
- julia_kwargs = {}
1295
- if self.julia_kwargs is not None:
1296
- for key, value in self.julia_kwargs.items():
1297
- julia_kwargs[key] = value
1298
- if "optimize" not in julia_kwargs:
1299
- julia_kwargs["optimize"] = 3
1300
- if "threads" not in julia_kwargs and packed_modified_params["multithreading"]:
1301
- julia_kwargs["threads"] = self.procs
1302
- packed_modified_params["julia_kwargs"] = julia_kwargs
1303
-
1304
  return packed_modified_params
1305
 
1306
  def _validate_and_set_fit_params(
@@ -1528,7 +1534,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1528
  # Need to be global as we don't want to recreate/reinstate julia for
1529
  # every new instance of PySRRegressor
1530
  global already_ran
1531
- global Main
1532
 
1533
  # These are the parameters which may be modified from the ones
1534
  # specified in init, so we define them here locally:
@@ -1543,32 +1548,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1543
  batch_size = mutated_params["batch_size"]
1544
  update_verbosity = mutated_params["update_verbosity"]
1545
  progress = mutated_params["progress"]
1546
- julia_kwargs = mutated_params["julia_kwargs"]
1547
 
1548
  # Start julia backend processes
1549
  if not already_ran and update_verbosity != 0:
1550
  print("Compiling Julia backend...")
1551
 
1552
- Main = init_julia(self.julia_project, julia_kwargs=julia_kwargs)
1553
-
1554
  if cluster_manager is not None:
1555
- cluster_manager = _load_cluster_manager(Main, cluster_manager)
1556
-
1557
- if self.update:
1558
- _, is_shared = _process_julia_project(self.julia_project)
1559
- io = "devnull" if update_verbosity == 0 else "stderr"
1560
- io_arg = (
1561
- f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else ""
1562
- )
1563
- _update_julia_project(Main, is_shared, io_arg)
1564
-
1565
- SymbolicRegression = _load_backend(Main)
1566
-
1567
- Main.plus = Main.eval("(+)")
1568
- Main.sub = Main.eval("(-)")
1569
- Main.mult = Main.eval("(*)")
1570
- Main.pow = Main.eval("(^)")
1571
- Main.div = Main.eval("(/)")
1572
 
1573
  # TODO(mcranmer): These functions should be part of this class.
1574
  binary_operators, unary_operators = _maybe_create_inline_operators(
@@ -1594,7 +1580,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1594
  nested_constraints_str += f"({inner_k}) => {inner_v}, "
1595
  nested_constraints_str += "), "
1596
  nested_constraints_str += ")"
1597
- nested_constraints = Main.eval(nested_constraints_str)
1598
 
1599
  # Parse dict into Julia Dict for complexities:
1600
  if complexity_of_operators is not None:
@@ -1602,13 +1588,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1602
  for k, v in complexity_of_operators.items():
1603
  complexity_of_operators_str += f"({k}) => {v}, "
1604
  complexity_of_operators_str += ")"
1605
- complexity_of_operators = Main.eval(complexity_of_operators_str)
1606
 
1607
- custom_loss = Main.eval(self.loss)
1608
- custom_full_objective = Main.eval(self.full_objective)
 
 
 
 
 
 
1609
 
1610
- early_stop_condition = Main.eval(
1611
- str(self.early_stop_condition) if self.early_stop_condition else None
 
 
1612
  )
1613
 
1614
  mutation_weights = SymbolicRegression.MutationWeights(
@@ -1627,10 +1621,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1627
  # Call to Julia backend.
1628
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
1629
  options = SymbolicRegression.Options(
1630
- binary_operators=Main.eval(str(binary_operators).replace("'", "")),
1631
- unary_operators=Main.eval(str(unary_operators).replace("'", "")),
1632
- bin_constraints=bin_constraints,
1633
- una_constraints=una_constraints,
1634
  complexity_of_operators=complexity_of_operators,
1635
  complexity_of_constants=self.complexity_of_constants,
1636
  complexity_of_variables=self.complexity_of_variables,
@@ -1665,7 +1659,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1665
  use_frequency_in_tournament=self.use_frequency_in_tournament,
1666
  adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
1667
  npop=self.population_size,
1668
- ncycles_per_iteration=self.ncyclesperiteration,
1669
  fraction_replaced=self.fraction_replaced,
1670
  topn=self.topn,
1671
  print_precision=self.print_precision,
@@ -1685,6 +1679,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1685
  define_helper_functions=False,
1686
  )
1687
 
 
 
1688
  # Convert data to desired precision
1689
  test_X = np.array(X)
1690
  is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
@@ -1695,18 +1691,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1695
  np_dtype = {32: np.complex64, 64: np.complex128}[self.precision]
1696
 
1697
  # This converts the data into a Julia array:
1698
- Main.X = np.array(X, dtype=np_dtype).T
1699
  if len(y.shape) == 1:
1700
- Main.y = np.array(y, dtype=np_dtype)
1701
  else:
1702
- Main.y = np.array(y, dtype=np_dtype).T
1703
  if weights is not None:
1704
  if len(weights.shape) == 1:
1705
- Main.weights = np.array(weights, dtype=np_dtype)
1706
  else:
1707
- Main.weights = np.array(weights, dtype=np_dtype).T
1708
  else:
1709
- Main.weights = None
1710
 
1711
  if self.procs == 0 and not multithreading:
1712
  parallelism = "serial"
@@ -1719,34 +1715,41 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1719
  None if parallelism in ["serial", "multithreading"] else int(self.procs)
1720
  )
1721
 
1722
- y_variable_names = None
1723
  if len(y.shape) > 1:
1724
  # We set these manually so that they respect Python's 0 indexing
1725
  # (by default Julia will use y1, y2...)
1726
- y_variable_names = [f"y{_subscriptify(i)}" for i in range(y.shape[1])]
 
 
 
 
1727
 
1728
- # Call to Julia backend.
1729
- # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl
1730
- self.raw_julia_state_ = SymbolicRegression.equation_search(
1731
- Main.X,
1732
- Main.y,
1733
- weights=Main.weights,
1734
  niterations=int(self.niterations),
1735
- variable_names=self.feature_names_in_.tolist(),
1736
- display_variable_names=self.display_feature_names_in_.tolist(),
1737
- y_variable_names=y_variable_names,
1738
- X_units=self.X_units_,
1739
- y_units=self.y_units_,
 
 
1740
  options=options,
1741
  numprocs=cprocs,
1742
  parallelism=parallelism,
1743
- saved_state=self.raw_julia_state_,
1744
  return_state=True,
1745
  addprocs_function=cluster_manager,
1746
  heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
1747
  progress=progress and self.verbosity > 0 and len(y.shape) == 1,
1748
  verbosity=int(self.verbosity),
1749
  )
 
 
 
1750
 
1751
  # Set attributes
1752
  self.equations_ = self.get_hof()
@@ -1810,10 +1813,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1810
  Fitted estimator.
1811
  """
1812
  # Init attributes that are not specified in BaseEstimator
1813
- if self.warm_start and hasattr(self, "raw_julia_state_"):
1814
  pass
1815
  else:
1816
- if hasattr(self, "raw_julia_state_"):
1817
  warnings.warn(
1818
  "The discovered expressions are being reset. "
1819
  "Please set `warm_start=True` if you wish to continue "
@@ -1823,7 +1826,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1823
  self.equations_ = None
1824
  self.nout_ = 1
1825
  self.selection_mask_ = None
1826
- self.raw_julia_state_ = None
 
1827
  self.X_units_ = None
1828
  self.y_units_ = None
1829
 
 
25
  from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
26
 
27
  from .denoising import denoise, multi_denoise
28
+ from .deprecated import DEPRECATED_KWARGS
29
  from .export_jax import sympy2jax
30
  from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
31
  from .export_numpy import sympy2numpy
 
33
  from .export_torch import sympy2torch
34
  from .feature_selection import run_feature_selection
35
  from .julia_helpers import (
36
+ PythonCall,
37
  _escape_filename,
 
38
  _load_cluster_manager,
39
+ jl_array,
40
+ jl_deserialize,
41
+ jl_serialize,
 
42
  )
43
+ from .julia_import import SymbolicRegression, jl
44
  from .utils import (
45
  _csv_filename_to_pkl_filename,
46
  _preprocess_julia_floats,
 
48
  _subscriptify,
49
  )
50
 
 
 
51
  already_ran = False
52
 
53
 
 
90
  def _maybe_create_inline_operators(
91
  binary_operators, unary_operators, extra_sympy_mappings
92
  ):
 
93
  binary_operators = binary_operators.copy()
94
  unary_operators = unary_operators.copy()
95
  for op_list in [binary_operators, unary_operators]:
 
97
  is_user_defined_operator = "(" in op
98
 
99
  if is_user_defined_operator:
100
+ jl.seval(op)
101
  # Cut off from the first non-alphanumeric char:
102
  first_non_char = [j for j, char in enumerate(op) if char == "("][0]
103
  function_name = op[:first_non_char]
 
268
  arguments are treated the same way, and the max of each
269
  argument is constrained.
270
  Default is `None`.
271
+ elementwise_loss : str
272
  String of Julia code specifying an elementwise loss function.
273
  Can either be a loss from LossFunctions.jl, or your own loss
274
  written as a function. Examples of custom written losses include:
 
284
  `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
285
  `SigmoidLoss()`, `DWDMarginLoss(q)`.
286
  Default is `"L2DistLoss()"`.
287
+ loss_function : str
288
  Alternatively, you can specify the full objective function as
289
  a snippet of Julia code, including any sort of custom evaluation
290
  (including symbolic manipulations beforehand), and any sort
291
+ of loss function or regularizations. The default `loss_function`
292
  used in SymbolicRegression.jl is roughly equal to:
293
  ```julia
294
  function eval_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}
 
354
  takes a loss and complexity as input, for example:
355
  `"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
356
  Default is `None`.
357
+ ncycles_per_iteration : int
358
  Number of total mutations to run, per 10 samples of the
359
  population, per iteration.
360
  Default is `550`.
 
398
  Constant optimization can also be performed as a mutation, in addition to
399
  the normal strategy controlled by `optimize_probability` which happens
400
  every iteration. Using it as a mutation is useful if you want to use
401
+ a large `ncycles_periteration`, and may not optimize very often.
402
  Default is `0.0`.
403
  crossover_probability : float
404
  Absolute probability of crossover-type genetic operation, instead of a mutation.
 
533
  delete_tempfiles : bool
534
  Whether to delete the temporary files after finishing.
535
  Default is `True`.
 
 
 
 
 
536
  update: bool
537
  Whether to automatically update Julia packages when `fit` is called.
538
  You should make sure that PySR is up-to-date itself first, as
 
577
  before passing to the symbolic regression code. None means no
578
  feature selection; an int means select that many features.
579
  Default is `None`.
 
 
 
 
 
580
  **kwargs : dict
581
  Supports deprecated keyword arguments. Other arguments will
582
  result in an error.
 
604
  Path to the temporary equations directory.
605
  equation_file_ : str
606
  Output equation file name produced by the julia backend.
607
+ julia_state_stream_ : ndarray
608
+ The serialized state for the julia SymbolicRegression.jl backend (after fitting),
609
+ stored as an array of uint8, produced by Julia's Serialization.serialize function.
610
+ julia_state_
611
+ The deserialized state.
612
+ julia_options_stream_ : ndarray
613
+ The serialized julia options, stored as an array of uint8,
614
+ julia_options_
615
+ The deserialized julia options.
616
  equation_file_contents_ : list[pandas.DataFrame]
617
  Contents of the equation file output by the Julia backend.
618
  show_pickle_warnings_ : bool
 
637
  ... "inv(x) = 1/x", # Custom operator (julia syntax)
638
  ... ],
639
  ... model_selection="best",
640
+ ... elementwise_loss="loss(x, y) = (x - y)^2", # Custom loss function (julia syntax)
641
  ... )
642
  >>> model.fit(X, y)
643
  >>> model
 
675
  timeout_in_seconds: Optional[float] = None,
676
  constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None,
677
  nested_constraints: Optional[Dict[str, Dict[str, int]]] = None,
678
+ elementwise_loss: Optional[str] = None,
679
+ loss_function: Optional[str] = None,
680
  complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None,
681
  complexity_of_constants: Union[int, float] = 1,
682
  complexity_of_variables: Union[int, float] = 1,
 
688
  alpha: float = 0.1,
689
  annealing: bool = False,
690
  early_stop_condition: Optional[Union[float, str]] = None,
691
+ ncycles_per_iteration: int = 550,
692
  fraction_replaced: float = 0.000364,
693
  fraction_replaced_hof: float = 0.035,
694
  weight_add_node: float = 0.79,
 
738
  temp_equation_file: bool = False,
739
  tempdir: Optional[str] = None,
740
  delete_tempfiles: bool = True,
 
741
  update: bool = False,
742
  output_jax_format: bool = False,
743
  output_torch_format: bool = False,
 
746
  extra_jax_mappings: Optional[Dict[Callable, str]] = None,
747
  denoise: bool = False,
748
  select_k_features: Optional[int] = None,
 
749
  **kwargs,
750
  ):
751
  # Hyperparameters
 
756
  self.niterations = niterations
757
  self.populations = populations
758
  self.population_size = population_size
759
+ self.ncycles_per_iteration = ncycles_per_iteration
760
  # - Equation Constraints
761
  self.maxsize = maxsize
762
  self.maxdepth = maxdepth
 
769
  self.timeout_in_seconds = timeout_in_seconds
770
  self.early_stop_condition = early_stop_condition
771
  # - Loss parameters
772
+ self.elementwise_loss = elementwise_loss
773
+ self.loss_function = loss_function
774
  self.complexity_of_operators = complexity_of_operators
775
  self.complexity_of_constants = complexity_of_constants
776
  self.complexity_of_variables = complexity_of_variables
 
836
  self.temp_equation_file = temp_equation_file
837
  self.tempdir = tempdir
838
  self.delete_tempfiles = delete_tempfiles
 
839
  self.update = update
840
  self.output_jax_format = output_jax_format
841
  self.output_torch_format = output_torch_format
 
845
  # Pre-modelling transformation
846
  self.denoise = denoise
847
  self.select_k_features = select_k_features
 
848
 
849
  # Once all valid parameters have been assigned handle the
850
  # deprecated kwargs
851
  if len(kwargs) > 0: # pragma: no cover
 
852
  for k, v in kwargs.items():
853
  # Handle renamed kwargs
854
+ if k in DEPRECATED_KWARGS:
855
+ updated_kwarg_name = DEPRECATED_KWARGS[k]
856
  setattr(self, updated_kwarg_name, v)
857
  warnings.warn(
858
  f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
 
866
  f"Ignoring parameter; please pass {k} during the call to fit instead.",
867
  FutureWarning,
868
  )
869
+ elif k == "julia_project":
870
+ warnings.warn(
871
+ "The `julia_project` parameter has been deprecated. To use a custom "
872
+ "julia project, please see `https://astroautomata.com/PySR/backend`.",
873
+ FutureWarning,
874
+ )
875
+ elif k == "julia_kwargs":
876
+ warnings.warn(
877
+ "The `julia_kwargs` parameter has been deprecated. To pass custom "
878
+ "keyword arguments to the julia backend, you should use environment variables. "
879
+ "See the Julia documentation for more information.",
880
+ FutureWarning,
881
+ )
882
  else:
883
  raise TypeError(
884
  f"{k} is not a valid keyword argument for PySRRegressor."
 
1053
  serialization.
1054
 
1055
  Thus, for `PySRRegressor` to support pickle serialization, the
1056
+ `julia_state_stream_` attribute must be hidden from pickle. This will
1057
  prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
1058
  but does allow all other attributes of a fitted `PySRRegressor` estimator
1059
  to be serialized. Note: Jax and Torch format equations are also removed
 
1063
  show_pickle_warning = not (
1064
  "show_pickle_warnings_" in state and not state["show_pickle_warnings_"]
1065
  )
 
 
 
 
 
 
1066
  state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"]
1067
  for state_key in state_keys_containing_lambdas:
1068
  if state[state_key] is not None and show_pickle_warning:
 
1071
  "serialized instance. When loading the model, please redefine "
1072
  f"`{state_key}` at runtime."
1073
  )
1074
+ state_keys_to_clear = state_keys_containing_lambdas
1075
  pickled_state = {
1076
  key: (None if key in state_keys_to_clear else value)
1077
  for key, value in state.items()
 
1121
  )
1122
  return self.equations_
1123
 
1124
+ @property
1125
+ def julia_options_(self):
1126
+ return jl_deserialize(self.julia_options_stream_)
1127
+
1128
+ @property
1129
+ def julia_state_(self):
1130
+ return jl_deserialize(self.julia_state_stream_)
1131
+
1132
+ @property
1133
+ def raw_julia_state_(self):
1134
+ warnings.warn(
1135
+ "PySRRegressor.raw_julia_state_ is now deprecated. "
1136
+ "Please use PySRRegressor.julia_state_ instead, or julia_state_stream_ "
1137
+ "for the raw stream of bytes.",
1138
+ FutureWarning,
1139
+ )
1140
+ return self.julia_state_
1141
+
1142
  def get_best(self, index=None):
1143
  """
1144
  Get best equation using `model_selection`.
 
1252
  "to True and `procs` to 0 will result in non-deterministic searches. "
1253
  )
1254
 
1255
+ if self.elementwise_loss is not None and self.loss_function is not None:
1256
+ raise ValueError(
1257
+ "You cannot set both `elementwise_loss` and `loss_function`."
1258
+ )
1259
 
1260
  # NotImplementedError - Values that could be supported at a later time
1261
  if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
 
1307
  > 0
1308
  )
1309
 
 
 
 
 
 
 
 
 
 
 
1310
  return packed_modified_params
1311
 
1312
  def _validate_and_set_fit_params(
 
1534
  # Need to be global as we don't want to recreate/reinstate julia for
1535
  # every new instance of PySRRegressor
1536
  global already_ran
 
1537
 
1538
  # These are the parameters which may be modified from the ones
1539
  # specified in init, so we define them here locally:
 
1548
  batch_size = mutated_params["batch_size"]
1549
  update_verbosity = mutated_params["update_verbosity"]
1550
  progress = mutated_params["progress"]
 
1551
 
1552
  # Start julia backend processes
1553
  if not already_ran and update_verbosity != 0:
1554
  print("Compiling Julia backend...")
1555
 
 
 
1556
  if cluster_manager is not None:
1557
+ cluster_manager = _load_cluster_manager(cluster_manager)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1558
 
1559
  # TODO(mcranmer): These functions should be part of this class.
1560
  binary_operators, unary_operators = _maybe_create_inline_operators(
 
1580
  nested_constraints_str += f"({inner_k}) => {inner_v}, "
1581
  nested_constraints_str += "), "
1582
  nested_constraints_str += ")"
1583
+ nested_constraints = jl.seval(nested_constraints_str)
1584
 
1585
  # Parse dict into Julia Dict for complexities:
1586
  if complexity_of_operators is not None:
 
1588
  for k, v in complexity_of_operators.items():
1589
  complexity_of_operators_str += f"({k}) => {v}, "
1590
  complexity_of_operators_str += ")"
1591
+ complexity_of_operators = jl.seval(complexity_of_operators_str)
1592
 
1593
+ custom_loss = jl.seval(
1594
+ str(self.elementwise_loss)
1595
+ if self.elementwise_loss is not None
1596
+ else "nothing"
1597
+ )
1598
+ custom_full_objective = jl.seval(
1599
+ str(self.loss_function) if self.loss_function is not None else "nothing"
1600
+ )
1601
 
1602
+ early_stop_condition = jl.seval(
1603
+ str(self.early_stop_condition)
1604
+ if self.early_stop_condition is not None
1605
+ else "nothing"
1606
  )
1607
 
1608
  mutation_weights = SymbolicRegression.MutationWeights(
 
1621
  # Call to Julia backend.
1622
  # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl
1623
  options = SymbolicRegression.Options(
1624
+ binary_operators=jl.seval(str(binary_operators).replace("'", "")),
1625
+ unary_operators=jl.seval(str(unary_operators).replace("'", "")),
1626
+ bin_constraints=jl_array(bin_constraints),
1627
+ una_constraints=jl_array(una_constraints),
1628
  complexity_of_operators=complexity_of_operators,
1629
  complexity_of_constants=self.complexity_of_constants,
1630
  complexity_of_variables=self.complexity_of_variables,
 
1659
  use_frequency_in_tournament=self.use_frequency_in_tournament,
1660
  adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
1661
  npop=self.population_size,
1662
+ ncycles_per_iteration=self.ncycles_per_iteration,
1663
  fraction_replaced=self.fraction_replaced,
1664
  topn=self.topn,
1665
  print_precision=self.print_precision,
 
1679
  define_helper_functions=False,
1680
  )
1681
 
1682
+ self.julia_options_stream_ = jl_serialize(options)
1683
+
1684
  # Convert data to desired precision
1685
  test_X = np.array(X)
1686
  is_complex = np.issubdtype(test_X.dtype, np.complexfloating)
 
1691
  np_dtype = {32: np.complex64, 64: np.complex128}[self.precision]
1692
 
1693
  # This converts the data into a Julia array:
1694
+ jl_X = jl_array(np.array(X, dtype=np_dtype).T)
1695
  if len(y.shape) == 1:
1696
+ jl_y = jl_array(np.array(y, dtype=np_dtype))
1697
  else:
1698
+ jl_y = jl_array(np.array(y, dtype=np_dtype).T)
1699
  if weights is not None:
1700
  if len(weights.shape) == 1:
1701
+ jl_weights = jl_array(np.array(weights, dtype=np_dtype))
1702
  else:
1703
+ jl_weights = jl_array(np.array(weights, dtype=np_dtype).T)
1704
  else:
1705
+ jl_weights = None
1706
 
1707
  if self.procs == 0 and not multithreading:
1708
  parallelism = "serial"
 
1715
  None if parallelism in ["serial", "multithreading"] else int(self.procs)
1716
  )
1717
 
 
1718
  if len(y.shape) > 1:
1719
  # We set these manually so that they respect Python's 0 indexing
1720
  # (by default Julia will use y1, y2...)
1721
+ jl_y_variable_names = jl_array(
1722
+ [f"y{_subscriptify(i)}" for i in range(y.shape[1])]
1723
+ )
1724
+ else:
1725
+ jl_y_variable_names = None
1726
 
1727
+ PythonCall.GC.disable()
1728
+ out = SymbolicRegression.equation_search(
1729
+ jl_X,
1730
+ jl_y,
1731
+ weights=jl_weights,
 
1732
  niterations=int(self.niterations),
1733
+ variable_names=jl_array([str(v) for v in self.feature_names_in_]),
1734
+ display_variable_names=jl_array(
1735
+ [str(v) for v in self.display_feature_names_in_]
1736
+ ),
1737
+ y_variable_names=jl_y_variable_names,
1738
+ X_units=jl_array(self.X_units_),
1739
+ y_units=jl_array(self.y_units_),
1740
  options=options,
1741
  numprocs=cprocs,
1742
  parallelism=parallelism,
1743
+ saved_state=self.julia_state_,
1744
  return_state=True,
1745
  addprocs_function=cluster_manager,
1746
  heap_size_hint_in_bytes=self.heap_size_hint_in_bytes,
1747
  progress=progress and self.verbosity > 0 and len(y.shape) == 1,
1748
  verbosity=int(self.verbosity),
1749
  )
1750
+ PythonCall.GC.enable()
1751
+
1752
+ self.julia_state_stream_ = jl_serialize(out)
1753
 
1754
  # Set attributes
1755
  self.equations_ = self.get_hof()
 
1813
  Fitted estimator.
1814
  """
1815
  # Init attributes that are not specified in BaseEstimator
1816
+ if self.warm_start and hasattr(self, "julia_state_stream_"):
1817
  pass
1818
  else:
1819
+ if hasattr(self, "julia_state_stream_"):
1820
  warnings.warn(
1821
  "The discovered expressions are being reset. "
1822
  "Please set `warm_start=True` if you wish to continue "
 
1826
  self.equations_ = None
1827
  self.nout_ = 1
1828
  self.selection_mask_ = None
1829
+ self.julia_state_stream_ = None
1830
+ self.julia_options_stream_ = None
1831
  self.X_units_ = None
1832
  self.y_units_ = None
1833
 
pysr/test/__init__.py CHANGED
@@ -1,7 +1,15 @@
1
  from .test import runtests
2
- from .test_cli import runtests as runtests_cli
3
- from .test_env import runtests as runtests_env
4
  from .test_jax import runtests as runtests_jax
 
5
  from .test_torch import runtests as runtests_torch
6
 
7
- __all__ = ["runtests", "runtests_env", "runtests_jax", "runtests_torch", "runtests_cli"]
 
 
 
 
 
 
 
 
1
  from .test import runtests
2
+ from .test_cli import get_runtests as get_runtests_cli
3
+ from .test_dev import runtests as runtests_dev
4
  from .test_jax import runtests as runtests_jax
5
+ from .test_startup import runtests as runtests_startup
6
  from .test_torch import runtests as runtests_torch
7
 
8
+ __all__ = [
9
+ "runtests",
10
+ "runtests_jax",
11
+ "runtests_torch",
12
+ "get_runtests_cli",
13
+ "runtests_startup",
14
+ "runtests_dev",
15
+ ]
pysr/test/__main__.py CHANGED
@@ -1,43 +1,13 @@
1
  """CLI for running PySR's test suite."""
2
  import argparse
3
- import os
4
 
5
  from . import *
6
 
7
  if __name__ == "__main__":
8
  # Get args:
9
  parser = argparse.ArgumentParser()
10
- parser.usage = "python -m pysr.test [tests...]"
11
  parser.add_argument(
12
  "test",
13
  nargs="*",
14
- help="Test to run. One or more of 'main', 'env', 'jax', 'torch', 'cli'.",
15
  )
16
-
17
- # Parse args:
18
- args = parser.parse_args()
19
- tests = args.test
20
-
21
- if len(tests) == 0:
22
- # Raise help message:
23
- parser.print_help()
24
- raise SystemExit(1)
25
-
26
- # Run tests:
27
- for test in tests:
28
- if test in {"main", "env", "jax", "torch", "cli"}:
29
- cur_dir = os.path.dirname(os.path.abspath(__file__))
30
- print(f"Running test from {cur_dir}")
31
- if test == "main":
32
- runtests()
33
- elif test == "env":
34
- runtests_env()
35
- elif test == "jax":
36
- runtests_jax()
37
- elif test == "torch":
38
- runtests_torch()
39
- elif test == "cli":
40
- runtests_cli()
41
- else:
42
- parser.print_help()
43
- raise SystemExit(1)
 
1
  """CLI for running PySR's test suite."""
2
  import argparse
 
3
 
4
  from . import *
5
 
6
  if __name__ == "__main__":
7
  # Get args:
8
  parser = argparse.ArgumentParser()
 
9
  parser.add_argument(
10
  "test",
11
  nargs="*",
12
+ help="DEPRECATED. Use `python -m pysr test [tests...]` instead.",
13
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pysr/test/generate_dev_juliapkg.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Example call:
2
+ ## python3 generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
3
+ import json
4
+ import sys
5
+
6
+ juliapkg_json = sys.argv[1]
7
+ path_to_srjl = sys.argv[2]
8
+
9
+ with open(juliapkg_json, "r") as f:
10
+ juliapkg = json.load(f)
11
+
12
+ del juliapkg["packages"]["SymbolicRegression"]["version"]
13
+ juliapkg["packages"]["SymbolicRegression"]["path"] = path_to_srjl
14
+ juliapkg["packages"]["SymbolicRegression"]["dev"] = True
15
+
16
+ with open(juliapkg_json, "w") as f:
17
+ json.dump(juliapkg, f, indent=4)
pysr/test/incremental_install_simulator.dockerfile DELETED
@@ -1,52 +0,0 @@
1
- # This dockerfile simulates a user installation that first
2
- # builds PySR for Python 3.9, and then upgrades to Python 3.10.
3
- # Normally this would cause an error when installing PyCall, so we want to
4
- # ensure that PySR can automatically patch things.
5
- FROM debian:bullseye-slim
6
-
7
- ENV DEBIAN_FRONTEND=noninteractive
8
-
9
- # Install juliaup and pyenv:
10
- RUN apt-get update && apt-get install -y curl git build-essential \
11
- libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev \
12
- libncurses5-dev libncursesw5-dev xz-utils libffi-dev liblzma-dev && \
13
- apt-get clean && \
14
- rm -rf /var/lib/apt/lists/*
15
-
16
- # Install juliaup:
17
- RUN curl -fsSL https://install.julialang.org | sh -s -- -y
18
-
19
- # Install pyenv:
20
- RUN curl -fsSL curl https://pyenv.run | sh && \
21
- echo 'export PATH="/root/.pyenv/bin:$PATH"' >> ~/.bashrc && \
22
- echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \
23
- echo 'eval "$(pyenv init -)"' >> ~/.bashrc && \
24
- echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc
25
-
26
- # Default to using bash -l:
27
- SHELL ["/bin/bash", "-l", "-c"]
28
-
29
- RUN juliaup add 1.8 && juliaup default 1.8
30
- RUN pyenv install 3.9.2 && pyenv global 3.9.2
31
- RUN python3 -m pip install --upgrade pip
32
-
33
- # Get PySR source:
34
- WORKDIR /pysr
35
- ADD ./requirements.txt /pysr/requirements.txt
36
- RUN python3 -m pip install -r /pysr/requirements.txt
37
-
38
- ADD ./setup.py /pysr/setup.py
39
- ADD ./pysr/ /pysr/pysr/
40
-
41
- # First install of PySR:
42
- RUN python3 -m pip install .
43
- RUN python3 -m pysr install
44
-
45
- # Change Python version:
46
- RUN pyenv install 3.10 && pyenv global 3.10 && pyenv uninstall -f 3.9.2
47
- RUN python3 -m pip install --upgrade pip
48
-
49
- # Second install of PySR:
50
- RUN python3 -m pip install .
51
- RUN rm -r ~/.julia/environments/pysr-*
52
- RUN python3 -m pysr install
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pysr/test/nb_sanitize.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [pathnames]
2
+ regex: /[a-zA-Z0-9_\- .\/]+/pysr/sr\.py
3
+ replace: PATH
pysr/test/params.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+
3
+ from .. import PySRRegressor
4
+
5
+ DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
6
+ DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
7
+ DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default
8
+ DEFAULT_NCYCLES = DEFAULT_PARAMS["ncycles_per_iteration"].default
pysr/test/test.py CHANGED
@@ -1,4 +1,3 @@
1
- import inspect
2
  import os
3
  import pickle as pkl
4
  import tempfile
@@ -12,16 +11,18 @@ import pandas as pd
12
  import sympy
13
  from sklearn.utils.estimator_checks import check_estimator
14
 
15
- from .. import PySRRegressor, julia_helpers
16
  from ..export_latex import sympy2latex
17
  from ..feature_selection import _handle_feature_selection, run_feature_selection
 
18
  from ..sr import _check_assertions, _process_constraints, idx_model_selection
19
  from ..utils import _csv_filename_to_pkl_filename
20
-
21
- DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
22
- DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
23
- DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default
24
- DEFAULT_NCYCLES = DEFAULT_PARAMS["ncyclesperiteration"].default
 
25
 
26
 
27
  class TestPipeline(unittest.TestCase):
@@ -80,7 +81,7 @@ class TestPipeline(unittest.TestCase):
80
  multithreading=False,
81
  turbo=True,
82
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
83
- full_objective="""
84
  function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
85
  prediction, flag = eval_tree_array(tree, dataset.X, options)
86
  !flag && return T(Inf)
@@ -95,22 +96,39 @@ class TestPipeline(unittest.TestCase):
95
  self.assertLessEqual(best_loss, 1e-10)
96
  self.assertGreaterEqual(best_loss, 0.0)
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def test_high_precision_search_custom_loss(self):
99
  y = 1.23456789 * self.X[:, 0]
100
  model = PySRRegressor(
101
  **self.default_test_kwargs,
102
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
103
- loss="my_loss(prediction, target) = (prediction - target)^2",
104
  precision=64,
105
  parsimony=0.01,
106
  warm_start=True,
107
  )
108
  model.fit(self.X, y)
109
- from pysr.sr import Main
110
 
111
  # We should have that the model state is now a Float64 hof:
112
- Main.test_state = model.raw_julia_state_
113
- self.assertTrue(Main.eval("typeof(test_state[2]).parameters[1] == Float64"))
 
 
 
114
 
115
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
116
  y = self.X[:, [0, 1]] ** 2
@@ -199,6 +217,7 @@ class TestPipeline(unittest.TestCase):
199
  **self.default_test_kwargs,
200
  early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
201
  )
 
202
  model.fit(X, y)
203
  test_y = model.predict(X)
204
  self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
@@ -224,16 +243,17 @@ class TestPipeline(unittest.TestCase):
224
  # Test if repeated fit works:
225
  regressor.set_params(
226
  niterations=1,
227
- ncyclesperiteration=2,
228
  warm_start=True,
229
  early_stop_condition=None,
230
  )
231
- # Check that the the julia state is saved:
232
- from pysr.sr import Main
233
 
234
  # We should have that the model state is now a Float32 hof:
235
- Main.test_state = regressor.raw_julia_state_
236
- self.assertTrue(Main.eval("typeof(test_state[2]).parameters[1] == Float32"))
 
 
 
237
  # This should exit almost immediately, and use the old equations
238
  regressor.fit(X, y)
239
 
@@ -548,6 +568,17 @@ class TestMiscellaneous(unittest.TestCase):
548
  # The correct value should be set:
549
  self.assertEqual(model.fraction_replaced, 0.2)
550
 
 
 
 
 
 
 
 
 
 
 
 
551
  def test_power_law_warning(self):
552
  """Ensure that a warning is given for a power law operator."""
553
  with self.assertWarns(UserWarning):
@@ -594,23 +625,6 @@ class TestMiscellaneous(unittest.TestCase):
594
  with self.assertRaises(ValueError):
595
  model.fit(X, y)
596
 
597
- def test_changed_options_warning(self):
598
- """Check that a warning is given if Julia options are changed."""
599
- if julia_helpers.julia_kwargs_at_initialization is None:
600
- julia_helpers.init_julia(julia_kwargs={"threads": 2, "optimize": 3})
601
-
602
- cur_init = julia_helpers.julia_kwargs_at_initialization
603
-
604
- threads_to_change = cur_init["threads"] + 1
605
- with warnings.catch_warnings():
606
- warnings.simplefilter("error")
607
- with self.assertRaises(Exception) as context:
608
- julia_helpers.init_julia(
609
- julia_kwargs={"threads": threads_to_change, "optimize": 3}
610
- )
611
- self.assertIn("Julia has already started", str(context.exception))
612
- self.assertIn("threads", str(context.exception))
613
-
614
  def test_extra_sympy_mappings_undefined(self):
615
  """extra_sympy_mappings=None errors for custom operators"""
616
  model = PySRRegressor(unary_operators=["square2(x) = x^2"])
@@ -640,6 +654,50 @@ class TestMiscellaneous(unittest.TestCase):
640
  model.fit(X, y, variable_names=["f{c}"])
641
  self.assertIn("Invalid variable name", str(cm.exception))
642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  def test_pickle_with_temp_equation_file(self):
644
  """If we have a temporary equation file, unpickle the estimator."""
645
  model = PySRRegressor(
@@ -678,7 +736,7 @@ class TestMiscellaneous(unittest.TestCase):
678
  model = PySRRegressor(
679
  niterations=int(1 + DEFAULT_NITERATIONS / 10),
680
  populations=int(1 + DEFAULT_POPULATIONS / 3),
681
- ncyclesperiteration=int(2 + DEFAULT_NCYCLES / 10),
682
  verbosity=0,
683
  progress=False,
684
  random_state=0,
@@ -715,6 +773,9 @@ class TestMiscellaneous(unittest.TestCase):
715
  def test_param_groupings(self):
716
  """Test that param_groupings are complete"""
717
  param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
 
 
 
718
  # Read the file, discarding lines ending in ":",
719
  # and removing leading "\s*-\s*":
720
  params = []
@@ -1072,10 +1133,8 @@ class TestDimensionalConstraints(unittest.TestCase):
1072
  # TODO: Determine desired behavior if second .fit() call does not have units
1073
 
1074
 
1075
- def runtests():
1076
  """Run all tests in test.py."""
1077
- suite = unittest.TestSuite()
1078
- loader = unittest.TestLoader()
1079
  test_cases = [
1080
  TestPipeline,
1081
  TestBest,
@@ -1084,8 +1143,11 @@ def runtests():
1084
  TestLaTeXTable,
1085
  TestDimensionalConstraints,
1086
  ]
 
 
 
 
1087
  for test_case in test_cases:
1088
- tests = loader.loadTestsFromTestCase(test_case)
1089
- suite.addTests(tests)
1090
  runner = unittest.TextTestRunner()
1091
  return runner.run(suite)
 
 
1
  import os
2
  import pickle as pkl
3
  import tempfile
 
11
  import sympy
12
  from sklearn.utils.estimator_checks import check_estimator
13
 
14
+ from .. import PySRRegressor, install, jl
15
  from ..export_latex import sympy2latex
16
  from ..feature_selection import _handle_feature_selection, run_feature_selection
17
+ from ..julia_helpers import init_julia
18
  from ..sr import _check_assertions, _process_constraints, idx_model_selection
19
  from ..utils import _csv_filename_to_pkl_filename
20
+ from .params import (
21
+ DEFAULT_NCYCLES,
22
+ DEFAULT_NITERATIONS,
23
+ DEFAULT_PARAMS,
24
+ DEFAULT_POPULATIONS,
25
+ )
26
 
27
 
28
  class TestPipeline(unittest.TestCase):
 
81
  multithreading=False,
82
  turbo=True,
83
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
84
+ loss_function="""
85
  function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
86
  prediction, flag = eval_tree_array(tree, dataset.X, options)
87
  !flag && return T(Inf)
 
96
  self.assertLessEqual(best_loss, 1e-10)
97
  self.assertGreaterEqual(best_loss, 0.0)
98
 
99
+ # Test options stored:
100
+ self.assertEqual(model.julia_options_.turbo, True)
101
+
102
+ def test_multiline_seval(self):
103
+ # The user should be able to run multiple things in a single seval call:
104
+ num = jl.seval(
105
+ """
106
+ function my_new_objective(x)
107
+ x^2
108
+ end
109
+ 1.5
110
+ """
111
+ )
112
+ self.assertEqual(num, 1.5)
113
+
114
  def test_high_precision_search_custom_loss(self):
115
  y = 1.23456789 * self.X[:, 0]
116
  model = PySRRegressor(
117
  **self.default_test_kwargs,
118
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
119
+ elementwise_loss="my_loss(prediction, target) = (prediction - target)^2",
120
  precision=64,
121
  parsimony=0.01,
122
  warm_start=True,
123
  )
124
  model.fit(self.X, y)
 
125
 
126
  # We should have that the model state is now a Float64 hof:
127
+ test_state = model.raw_julia_state_
128
+ self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
129
+
130
+ # Test options stored:
131
+ self.assertEqual(model.julia_options_.turbo, False)
132
 
133
  def test_multioutput_custom_operator_quiet_custom_complexity(self):
134
  y = self.X[:, [0, 1]] ** 2
 
217
  **self.default_test_kwargs,
218
  early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6",
219
  )
220
+ model.niterations = DEFAULT_NITERATIONS * 10
221
  model.fit(X, y)
222
  test_y = model.predict(X)
223
  self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating))
 
243
  # Test if repeated fit works:
244
  regressor.set_params(
245
  niterations=1,
246
+ ncycles_per_iteration=2,
247
  warm_start=True,
248
  early_stop_condition=None,
249
  )
 
 
250
 
251
  # We should have that the model state is now a Float32 hof:
252
+ test_state = regressor.julia_state_
253
+ self.assertTrue(
254
+ jl.first(jl.typeof(jl.last(test_state)).parameters) == jl.Float32
255
+ )
256
+
257
  # This should exit almost immediately, and use the old equations
258
  regressor.fit(X, y)
259
 
 
568
  # The correct value should be set:
569
  self.assertEqual(model.fraction_replaced, 0.2)
570
 
571
+ def test_deprecated_functions(self):
572
+ with self.assertWarns(FutureWarning):
573
+ install()
574
+
575
+ _jl = None
576
+
577
+ with self.assertWarns(FutureWarning):
578
+ _jl = init_julia()
579
+
580
+ self.assertEqual(_jl, jl)
581
+
582
  def test_power_law_warning(self):
583
  """Ensure that a warning is given for a power law operator."""
584
  with self.assertWarns(UserWarning):
 
625
  with self.assertRaises(ValueError):
626
  model.fit(X, y)
627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  def test_extra_sympy_mappings_undefined(self):
629
  """extra_sympy_mappings=None errors for custom operators"""
630
  model = PySRRegressor(unary_operators=["square2(x) = x^2"])
 
654
  model.fit(X, y, variable_names=["f{c}"])
655
  self.assertIn("Invalid variable name", str(cm.exception))
656
 
657
+ def test_bad_kwargs(self):
658
+ bad_kwargs = [
659
+ dict(
660
+ kwargs=dict(
661
+ elementwise_loss="g(x, y) = 0.0", loss_function="f(*args) = 0.0"
662
+ ),
663
+ error=ValueError,
664
+ ),
665
+ dict(
666
+ kwargs=dict(maxsize=3),
667
+ error=ValueError,
668
+ ),
669
+ dict(
670
+ kwargs=dict(tournament_selection_n=10, population_size=3),
671
+ error=ValueError,
672
+ ),
673
+ dict(
674
+ kwargs=dict(optimizer_algorithm="COBYLA"),
675
+ error=NotImplementedError,
676
+ ),
677
+ dict(
678
+ kwargs=dict(
679
+ constraints={
680
+ "+": (3, 5),
681
+ }
682
+ ),
683
+ error=NotImplementedError,
684
+ ),
685
+ dict(
686
+ kwargs=dict(binary_operators=["α(x, y) = x - y"]),
687
+ error=ValueError,
688
+ ),
689
+ dict(
690
+ kwargs=dict(model_selection="unknown"),
691
+ error=NotImplementedError,
692
+ ),
693
+ ]
694
+ for opt in bad_kwargs:
695
+ model = PySRRegressor(**opt["kwargs"], niterations=1)
696
+ with self.assertRaises(opt["error"]):
697
+ model.fit([[1]], [1])
698
+ model.get_best()
699
+ print("Failed", opt["kwargs"])
700
+
701
  def test_pickle_with_temp_equation_file(self):
702
  """If we have a temporary equation file, unpickle the estimator."""
703
  model = PySRRegressor(
 
736
  model = PySRRegressor(
737
  niterations=int(1 + DEFAULT_NITERATIONS / 10),
738
  populations=int(1 + DEFAULT_POPULATIONS / 3),
739
+ ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
740
  verbosity=0,
741
  progress=False,
742
  random_state=0,
 
773
  def test_param_groupings(self):
774
  """Test that param_groupings are complete"""
775
  param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
776
+ if not param_groupings_file.exists():
777
+ return
778
+
779
  # Read the file, discarding lines ending in ":",
780
  # and removing leading "\s*-\s*":
781
  params = []
 
1133
  # TODO: Determine desired behavior if second .fit() call does not have units
1134
 
1135
 
1136
+ def runtests(just_tests=False):
1137
  """Run all tests in test.py."""
 
 
1138
  test_cases = [
1139
  TestPipeline,
1140
  TestBest,
 
1143
  TestLaTeXTable,
1144
  TestDimensionalConstraints,
1145
  ]
1146
+ if just_tests:
1147
+ return test_cases
1148
+ suite = unittest.TestSuite()
1149
+ loader = unittest.TestLoader()
1150
  for test_case in test_cases:
1151
+ suite.addTests(loader.loadTestsFromTestCase(test_case))
 
1152
  runner = unittest.TextTestRunner()
1153
  return runner.run(suite)
pysr/test/test_cli.py CHANGED
@@ -1,59 +1,83 @@
1
  import unittest
 
2
 
3
  from click import testing as click_testing
4
 
5
- from .._cli.main import pysr
6
-
7
-
8
- class TestCli(unittest.TestCase):
9
- # TODO: Include test for custom project here.
10
- def setUp(self):
11
- self.cli_runner = click_testing.CliRunner()
12
-
13
- def test_help_on_all_commands(self):
14
- expected = "\n".join(
15
- [
16
- "Usage: pysr [OPTIONS] COMMAND [ARGS]...",
17
- "",
18
- "Options:",
19
- " --help Show this message and exit.",
20
- "",
21
- "Commands:",
22
- " install Install Julia dependencies for PySR.",
23
- "",
24
- ]
25
- )
26
- result = self.cli_runner.invoke(pysr, ["--help"])
27
- self.assertEqual(expected, result.output)
28
- self.assertEqual(0, result.exit_code)
29
-
30
- def test_help_on_install(self):
31
- expected = "\n".join(
32
- [
33
- "Usage: pysr install [OPTIONS]",
34
- "",
35
- " Install Julia dependencies for PySR.",
36
- "",
37
- "Options:",
38
- " -p, --project PROJECT_DIRECTORY",
39
- " Install in a specific Julia project (e.g., a",
40
- " local copy of SymbolicRegression.jl).",
41
- " -q, --quiet Disable logging.",
42
- " --precompile Force precompilation of Julia libraries.",
43
- " --no-precompile Disable precompilation.",
44
- " --help Show this message and exit.",
45
- "",
46
- ]
47
- )
48
- result = self.cli_runner.invoke(pysr, ["install", "--help"])
49
- self.assertEqual(expected, result.output)
50
- self.assertEqual(0, result.exit_code)
51
-
52
-
53
- def runtests():
54
- """Run all tests in cliTest.py."""
55
- loader = unittest.TestLoader()
56
- suite = unittest.TestSuite()
57
- suite.addTests(loader.loadTestsFromTestCase(TestCli))
58
- runner = unittest.TextTestRunner()
59
- return runner.run(suite)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import unittest
2
+ from textwrap import dedent
3
 
4
  from click import testing as click_testing
5
 
6
+
7
+ def get_runtests():
8
+ # Lazy load to avoid circular imports.
9
+
10
+ from .._cli.main import pysr
11
+
12
+ class TestCli(unittest.TestCase):
13
+ # TODO: Include test for custom project here.
14
+ def setUp(self):
15
+ self.cli_runner = click_testing.CliRunner()
16
+
17
+ def test_help_on_all_commands(self):
18
+ expected = dedent(
19
+ """
20
+ Usage: pysr [OPTIONS] COMMAND [ARGS]...
21
+
22
+ Options:
23
+ --help Show this message and exit.
24
+
25
+ Commands:
26
+ install DEPRECATED (dependencies are now installed at import).
27
+ test Run parts of the PySR test suite.
28
+ """
29
+ )
30
+ result = self.cli_runner.invoke(pysr, ["--help"])
31
+ self.assertEqual(result.output.strip(), expected.strip())
32
+ self.assertEqual(result.exit_code, 0)
33
+
34
+ def test_help_on_install(self):
35
+ expected = dedent(
36
+ """
37
+ Usage: pysr install [OPTIONS]
38
+
39
+ DEPRECATED (dependencies are now installed at import).
40
+
41
+ Options:
42
+ -p, --project TEXT
43
+ -q, --quiet Disable logging.
44
+ --precompile
45
+ --no-precompile
46
+ --help Show this message and exit.
47
+ """
48
+ )
49
+ result = self.cli_runner.invoke(pysr, ["install", "--help"])
50
+ self.assertEqual(result.output.strip(), expected.strip())
51
+ self.assertEqual(result.exit_code, 0)
52
+
53
+ def test_help_on_test(self):
54
+ expected = dedent(
55
+ """
56
+ Usage: pysr test [OPTIONS] TESTS
57
+
58
+ Run parts of the PySR test suite.
59
+
60
+ Choose from main, jax, torch, cli, dev, and startup. You can give multiple
61
+ tests, separated by commas.
62
+
63
+ Options:
64
+ --help Show this message and exit.
65
+ """
66
+ )
67
+ result = self.cli_runner.invoke(pysr, ["test", "--help"])
68
+ self.assertEqual(result.output.strip(), expected.strip())
69
+ self.assertEqual(result.exit_code, 0)
70
+
71
+ def runtests(just_tests=False):
72
+ """Run all tests in cliTest.py."""
73
+ tests = [TestCli]
74
+ if just_tests:
75
+ return tests
76
+ loader = unittest.TestLoader()
77
+ suite = unittest.TestSuite()
78
+ for test in tests:
79
+ suite.addTests(loader.loadTestsFromTestCase(test))
80
+ runner = unittest.TextTestRunner()
81
+ return runner.run(suite)
82
+
83
+ return runtests
pysr/test/test_dev.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import unittest
4
+ from pathlib import Path
5
+
6
+
7
+ class TestDev(unittest.TestCase):
8
+ def test_simple_change_to_backend(self):
9
+ """Test that we can use a development version of SymbolicRegression.jl"""
10
+ PYSR_TEST_JULIA_VERSION = os.environ.get("PYSR_TEST_JULIA_VERSION", "1.6")
11
+ PYSR_TEST_PYTHON_VERSION = os.environ.get("PYSR_TEST_PYTHON_VERSION", "3.9")
12
+ build_result = subprocess.run(
13
+ [
14
+ "docker",
15
+ "build",
16
+ "-t",
17
+ "pysr-dev",
18
+ "--build-arg",
19
+ f"JLVERSION={PYSR_TEST_JULIA_VERSION}",
20
+ "--build-arg",
21
+ f"PYVERSION={PYSR_TEST_PYTHON_VERSION}",
22
+ "-f",
23
+ "pysr/test/test_dev_pysr.dockerfile",
24
+ ".",
25
+ ],
26
+ env=os.environ,
27
+ cwd=Path(__file__).parent.parent.parent,
28
+ universal_newlines=True,
29
+ )
30
+ self.assertEqual(build_result.returncode, 0)
31
+ test_result = subprocess.run(
32
+ [
33
+ "docker",
34
+ "run",
35
+ "--rm",
36
+ "pysr-dev",
37
+ "python3",
38
+ "-c",
39
+ "from pysr import SymbolicRegression as SR; print(SR.__test_function())",
40
+ ],
41
+ stdout=subprocess.PIPE,
42
+ stderr=subprocess.PIPE,
43
+ env=os.environ,
44
+ cwd=Path(__file__).parent.parent.parent,
45
+ )
46
+ self.assertEqual(test_result.returncode, 0)
47
+ self.assertEqual(test_result.stdout.decode("utf-8").strip(), "2.3")
48
+
49
+
50
+ def runtests(just_tests=False):
51
+ tests = [TestDev]
52
+ if just_tests:
53
+ return tests
54
+ suite = unittest.TestSuite()
55
+ loader = unittest.TestLoader()
56
+ for test in tests:
57
+ suite.addTests(loader.loadTestsFromTestCase(test))
58
+ runner = unittest.TextTestRunner()
59
+ return runner.run(suite)
pysr/test/test_dev_pysr.dockerfile ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This dockerfile simulates a user installation that
2
+ # tries to manually edit SymbolicRegression.jl and
3
+ # use it from PySR.
4
+
5
+ ARG JLVERSION=1.9.4
6
+ ARG PYVERSION=3.11.6
7
+ ARG BASE_IMAGE=bullseye
8
+
9
+ FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
10
+ FROM python:${PYVERSION}-${BASE_IMAGE}
11
+
12
+ # Merge Julia image:
13
+ COPY --from=jl /usr/local/julia /usr/local/julia
14
+ ENV PATH="/usr/local/julia/bin:${PATH}"
15
+
16
+ WORKDIR /pysr
17
+
18
+ # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
19
+ ADD ./requirements.txt /pysr/requirements.txt
20
+ RUN pip3 install --no-cache-dir -r /pysr/requirements.txt
21
+
22
+ # Install PySR:
23
+ # We do a minimal copy so it doesn't need to rerun at every file change:
24
+ ADD ./pyproject.toml /pysr/pyproject.toml
25
+ ADD ./setup.py /pysr/setup.py
26
+
27
+ RUN mkdir /pysr/pysr
28
+ ADD ./pysr/*.py /pysr/pysr/
29
+ ADD ./pysr/juliapkg.json /pysr/pysr/juliapkg.json
30
+
31
+ RUN mkdir /pysr/pysr/_cli
32
+ ADD ./pysr/_cli/*.py /pysr/pysr/_cli/
33
+
34
+ RUN mkdir /pysr/pysr/test
35
+
36
+ RUN pip3 install --no-cache-dir .
37
+
38
+ # Now, we create a custom version of SymbolicRegression.jl
39
+ # First, we get the version from juliapkg.json:
40
+ RUN python3 -c 'import json; print(json.load(open("/pysr/pysr/juliapkg.json", "r"))["packages"]["SymbolicRegression"]["version"])' > /pysr/sr_version
41
+
42
+ # Remove any = or ^ or ~ from the version:
43
+ RUN cat /pysr/sr_version | sed 's/[\^=~]//g' > /pysr/sr_version_processed
44
+
45
+ # Now, we check out the version of SymbolicRegression.jl that PySR is using:
46
+ RUN git clone -b "v$(cat /pysr/sr_version_processed)" --single-branch https://github.com/MilesCranmer/SymbolicRegression.jl /srjl
47
+
48
+ # Edit SymbolicRegression.jl to create a new function.
49
+ # We want to put this function immediately after `module SymbolicRegression`:
50
+ RUN sed -i 's/module SymbolicRegression/module SymbolicRegression\n__test_function() = 2.3/' /srjl/src/SymbolicRegression.jl
51
+
52
+ # Edit PySR to use the custom version of SymbolicRegression.jl:
53
+ ADD ./pysr/test/generate_dev_juliapkg.py /generate_dev_juliapkg.py
54
+ RUN python3 /generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
55
+
56
+ # Precompile
57
+ RUN python3 -c 'import pysr'
pysr/test/test_env.py DELETED
@@ -1,58 +0,0 @@
1
- """Contains tests for creating and initializing custom Julia projects."""
2
-
3
- import os
4
- import unittest
5
- from tempfile import TemporaryDirectory
6
-
7
- from .. import julia_helpers
8
-
9
-
10
- class TestJuliaProject(unittest.TestCase):
11
- """Various tests for working with Julia projects."""
12
-
13
- def test_custom_shared_env(self):
14
- """Test that we can use PySR in a custom shared env."""
15
- with TemporaryDirectory() as tmpdir:
16
- # Create a temp depot to store julia packages (and our custom env)
17
- Main = julia_helpers.init_julia()
18
-
19
- # Set up env:
20
- if "JULIA_DEPOT_PATH" not in os.environ:
21
- old_env = None
22
- os.environ["JULIA_DEPOT_PATH"] = tmpdir
23
- else:
24
- old_env = os.environ["JULIA_DEPOT_PATH"]
25
- os.environ[
26
- "JULIA_DEPOT_PATH"
27
- ] = f"{tmpdir}:{os.environ['JULIA_DEPOT_PATH']}"
28
- Main.eval(
29
- f'pushfirst!(DEPOT_PATH, "{julia_helpers._escape_filename(tmpdir)}")'
30
- )
31
- test_env_name = "@pysr_test_env"
32
- julia_helpers.install(julia_project=test_env_name)
33
- Main = julia_helpers.init_julia(julia_project=test_env_name)
34
-
35
- # Try to use env:
36
- Main.eval("using SymbolicRegression")
37
- Main.eval("using Pkg")
38
-
39
- # Assert we actually loaded it:
40
- cur_project_dir = Main.eval("splitdir(dirname(Base.active_project()))[1]")
41
- potential_shared_project_dirs = Main.eval("Pkg.envdir(DEPOT_PATH[1])")
42
- self.assertEqual(cur_project_dir, potential_shared_project_dirs)
43
-
44
- # Clean up:
45
- Main.eval("pop!(DEPOT_PATH)")
46
- if old_env is None:
47
- del os.environ["JULIA_DEPOT_PATH"]
48
- else:
49
- os.environ["JULIA_DEPOT_PATH"] = old_env
50
-
51
-
52
- def runtests():
53
- """Run all tests in test_env.py."""
54
- loader = unittest.TestLoader()
55
- suite = unittest.TestSuite()
56
- suite.addTests(loader.loadTestsFromTestCase(TestJuliaProject))
57
- runner = unittest.TextTestRunner()
58
- return runner.run(suite)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pysr/test/test_jax.py CHANGED
@@ -121,10 +121,14 @@ class TestJAX(unittest.TestCase):
121
  np.testing.assert_almost_equal(y.values, jax_output, decimal=3)
122
 
123
 
124
- def runtests():
125
  """Run all tests in test_jax.py."""
 
 
 
126
  loader = unittest.TestLoader()
127
  suite = unittest.TestSuite()
128
- suite.addTests(loader.loadTestsFromTestCase(TestJAX))
 
129
  runner = unittest.TextTestRunner()
130
  return runner.run(suite)
 
121
  np.testing.assert_almost_equal(y.values, jax_output, decimal=3)
122
 
123
 
124
+ def runtests(just_tests=False):
125
  """Run all tests in test_jax.py."""
126
+ tests = [TestJAX]
127
+ if just_tests:
128
+ return tests
129
  loader = unittest.TestLoader()
130
  suite = unittest.TestSuite()
131
+ for test in tests:
132
+ suite.addTests(loader.loadTestsFromTestCase(test))
133
  runner = unittest.TextTestRunner()
134
  return runner.run(suite)
pysr/test/test_nb.ipynb ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# NBVAL_IGNORE_OUTPUT\n",
10
+ "import numpy as np\n",
11
+ "from pysr import PySRRegressor, jl"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stdout",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "3\n"
24
+ ]
25
+ }
26
+ ],
27
+ "source": [
28
+ "%%julia\n",
29
+ "\n",
30
+ "# Automatically activates Julia magic\n",
31
+ "\n",
32
+ "x = 1\n",
33
+ "println(x + 2)"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 3,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "name": "stdout",
43
+ "output_type": "stream",
44
+ "text": [
45
+ "4\n"
46
+ ]
47
+ }
48
+ ],
49
+ "source": [
50
+ "%julia println(x + 3)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 4,
56
+ "metadata": {},
57
+ "outputs": [
58
+ {
59
+ "data": {
60
+ "text/html": [
61
+ "<style>#sk-container-id-1 {\n",
62
+ " /* Definition of color scheme common for light and dark mode */\n",
63
+ " --sklearn-color-text: black;\n",
64
+ " --sklearn-color-line: gray;\n",
65
+ " /* Definition of color scheme for unfitted estimators */\n",
66
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
67
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
68
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
69
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
70
+ " /* Definition of color scheme for fitted estimators */\n",
71
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
72
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
73
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
74
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
75
+ "\n",
76
+ " /* Specific color for light theme */\n",
77
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
78
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
79
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
80
+ " --sklearn-color-icon: #696969;\n",
81
+ "\n",
82
+ " @media (prefers-color-scheme: dark) {\n",
83
+ " /* Redefinition of color scheme for dark theme */\n",
84
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
85
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
86
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
87
+ " --sklearn-color-icon: #878787;\n",
88
+ " }\n",
89
+ "}\n",
90
+ "\n",
91
+ "#sk-container-id-1 {\n",
92
+ " color: var(--sklearn-color-text);\n",
93
+ "}\n",
94
+ "\n",
95
+ "#sk-container-id-1 pre {\n",
96
+ " padding: 0;\n",
97
+ "}\n",
98
+ "\n",
99
+ "#sk-container-id-1 input.sk-hidden--visually {\n",
100
+ " border: 0;\n",
101
+ " clip: rect(1px 1px 1px 1px);\n",
102
+ " clip: rect(1px, 1px, 1px, 1px);\n",
103
+ " height: 1px;\n",
104
+ " margin: -1px;\n",
105
+ " overflow: hidden;\n",
106
+ " padding: 0;\n",
107
+ " position: absolute;\n",
108
+ " width: 1px;\n",
109
+ "}\n",
110
+ "\n",
111
+ "#sk-container-id-1 div.sk-dashed-wrapped {\n",
112
+ " border: 1px dashed var(--sklearn-color-line);\n",
113
+ " margin: 0 0.4em 0.5em 0.4em;\n",
114
+ " box-sizing: border-box;\n",
115
+ " padding-bottom: 0.4em;\n",
116
+ " background-color: var(--sklearn-color-background);\n",
117
+ "}\n",
118
+ "\n",
119
+ "#sk-container-id-1 div.sk-container {\n",
120
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
121
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
122
+ " so we also need the `!important` here to be able to override the\n",
123
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
124
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
125
+ " display: inline-block !important;\n",
126
+ " position: relative;\n",
127
+ "}\n",
128
+ "\n",
129
+ "#sk-container-id-1 div.sk-text-repr-fallback {\n",
130
+ " display: none;\n",
131
+ "}\n",
132
+ "\n",
133
+ "div.sk-parallel-item,\n",
134
+ "div.sk-serial,\n",
135
+ "div.sk-item {\n",
136
+ " /* draw centered vertical line to link estimators */\n",
137
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
138
+ " background-size: 2px 100%;\n",
139
+ " background-repeat: no-repeat;\n",
140
+ " background-position: center center;\n",
141
+ "}\n",
142
+ "\n",
143
+ "/* Parallel-specific style estimator block */\n",
144
+ "\n",
145
+ "#sk-container-id-1 div.sk-parallel-item::after {\n",
146
+ " content: \"\";\n",
147
+ " width: 100%;\n",
148
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
149
+ " flex-grow: 1;\n",
150
+ "}\n",
151
+ "\n",
152
+ "#sk-container-id-1 div.sk-parallel {\n",
153
+ " display: flex;\n",
154
+ " align-items: stretch;\n",
155
+ " justify-content: center;\n",
156
+ " background-color: var(--sklearn-color-background);\n",
157
+ " position: relative;\n",
158
+ "}\n",
159
+ "\n",
160
+ "#sk-container-id-1 div.sk-parallel-item {\n",
161
+ " display: flex;\n",
162
+ " flex-direction: column;\n",
163
+ "}\n",
164
+ "\n",
165
+ "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
166
+ " align-self: flex-end;\n",
167
+ " width: 50%;\n",
168
+ "}\n",
169
+ "\n",
170
+ "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
171
+ " align-self: flex-start;\n",
172
+ " width: 50%;\n",
173
+ "}\n",
174
+ "\n",
175
+ "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
176
+ " width: 0;\n",
177
+ "}\n",
178
+ "\n",
179
+ "/* Serial-specific style estimator block */\n",
180
+ "\n",
181
+ "#sk-container-id-1 div.sk-serial {\n",
182
+ " display: flex;\n",
183
+ " flex-direction: column;\n",
184
+ " align-items: center;\n",
185
+ " background-color: var(--sklearn-color-background);\n",
186
+ " padding-right: 1em;\n",
187
+ " padding-left: 1em;\n",
188
+ "}\n",
189
+ "\n",
190
+ "\n",
191
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
192
+ "clickable and can be expanded/collapsed.\n",
193
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
194
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
195
+ "*/\n",
196
+ "\n",
197
+ "/* Pipeline and ColumnTransformer style (default) */\n",
198
+ "\n",
199
+ "#sk-container-id-1 div.sk-toggleable {\n",
200
+ " /* Default theme specific background. It is overwritten whether we have a\n",
201
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
202
+ " background-color: var(--sklearn-color-background);\n",
203
+ "}\n",
204
+ "\n",
205
+ "/* Toggleable label */\n",
206
+ "#sk-container-id-1 label.sk-toggleable__label {\n",
207
+ " cursor: pointer;\n",
208
+ " display: block;\n",
209
+ " width: 100%;\n",
210
+ " margin-bottom: 0;\n",
211
+ " padding: 0.5em;\n",
212
+ " box-sizing: border-box;\n",
213
+ " text-align: center;\n",
214
+ "}\n",
215
+ "\n",
216
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
217
+ " /* Arrow on the left of the label */\n",
218
+ " content: \"▸\";\n",
219
+ " float: left;\n",
220
+ " margin-right: 0.25em;\n",
221
+ " color: var(--sklearn-color-icon);\n",
222
+ "}\n",
223
+ "\n",
224
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
225
+ " color: var(--sklearn-color-text);\n",
226
+ "}\n",
227
+ "\n",
228
+ "/* Toggleable content - dropdown */\n",
229
+ "\n",
230
+ "#sk-container-id-1 div.sk-toggleable__content {\n",
231
+ " max-height: 0;\n",
232
+ " max-width: 0;\n",
233
+ " overflow: hidden;\n",
234
+ " text-align: left;\n",
235
+ " /* unfitted */\n",
236
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
237
+ "}\n",
238
+ "\n",
239
+ "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
240
+ " /* fitted */\n",
241
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
242
+ "}\n",
243
+ "\n",
244
+ "#sk-container-id-1 div.sk-toggleable__content pre {\n",
245
+ " margin: 0.2em;\n",
246
+ " border-radius: 0.25em;\n",
247
+ " color: var(--sklearn-color-text);\n",
248
+ " /* unfitted */\n",
249
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
250
+ "}\n",
251
+ "\n",
252
+ "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
253
+ " /* unfitted */\n",
254
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
255
+ "}\n",
256
+ "\n",
257
+ "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
258
+ " /* Expand drop-down */\n",
259
+ " max-height: 200px;\n",
260
+ " max-width: 100%;\n",
261
+ " overflow: auto;\n",
262
+ "}\n",
263
+ "\n",
264
+ "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
265
+ " content: \"▾\";\n",
266
+ "}\n",
267
+ "\n",
268
+ "/* Pipeline/ColumnTransformer-specific style */\n",
269
+ "\n",
270
+ "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
271
+ " color: var(--sklearn-color-text);\n",
272
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
273
+ "}\n",
274
+ "\n",
275
+ "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
276
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
277
+ "}\n",
278
+ "\n",
279
+ "/* Estimator-specific style */\n",
280
+ "\n",
281
+ "/* Colorize estimator box */\n",
282
+ "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
283
+ " /* unfitted */\n",
284
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
285
+ "}\n",
286
+ "\n",
287
+ "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
288
+ " /* fitted */\n",
289
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
290
+ "}\n",
291
+ "\n",
292
+ "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
293
+ "#sk-container-id-1 div.sk-label label {\n",
294
+ " /* The background is the default theme color */\n",
295
+ " color: var(--sklearn-color-text-on-default-background);\n",
296
+ "}\n",
297
+ "\n",
298
+ "/* On hover, darken the color of the background */\n",
299
+ "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
300
+ " color: var(--sklearn-color-text);\n",
301
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
302
+ "}\n",
303
+ "\n",
304
+ "/* Label box, darken color on hover, fitted */\n",
305
+ "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
306
+ " color: var(--sklearn-color-text);\n",
307
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
308
+ "}\n",
309
+ "\n",
310
+ "/* Estimator label */\n",
311
+ "\n",
312
+ "#sk-container-id-1 div.sk-label label {\n",
313
+ " font-family: monospace;\n",
314
+ " font-weight: bold;\n",
315
+ " display: inline-block;\n",
316
+ " line-height: 1.2em;\n",
317
+ "}\n",
318
+ "\n",
319
+ "#sk-container-id-1 div.sk-label-container {\n",
320
+ " text-align: center;\n",
321
+ "}\n",
322
+ "\n",
323
+ "/* Estimator-specific */\n",
324
+ "#sk-container-id-1 div.sk-estimator {\n",
325
+ " font-family: monospace;\n",
326
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
327
+ " border-radius: 0.25em;\n",
328
+ " box-sizing: border-box;\n",
329
+ " margin-bottom: 0.5em;\n",
330
+ " /* unfitted */\n",
331
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
332
+ "}\n",
333
+ "\n",
334
+ "#sk-container-id-1 div.sk-estimator.fitted {\n",
335
+ " /* fitted */\n",
336
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
337
+ "}\n",
338
+ "\n",
339
+ "/* on hover */\n",
340
+ "#sk-container-id-1 div.sk-estimator:hover {\n",
341
+ " /* unfitted */\n",
342
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
343
+ "}\n",
344
+ "\n",
345
+ "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
346
+ " /* fitted */\n",
347
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
348
+ "}\n",
349
+ "\n",
350
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
351
+ "\n",
352
+ "/* Common style for \"i\" and \"?\" */\n",
353
+ "\n",
354
+ ".sk-estimator-doc-link,\n",
355
+ "a:link.sk-estimator-doc-link,\n",
356
+ "a:visited.sk-estimator-doc-link {\n",
357
+ " float: right;\n",
358
+ " font-size: smaller;\n",
359
+ " line-height: 1em;\n",
360
+ " font-family: monospace;\n",
361
+ " background-color: var(--sklearn-color-background);\n",
362
+ " border-radius: 1em;\n",
363
+ " height: 1em;\n",
364
+ " width: 1em;\n",
365
+ " text-decoration: none !important;\n",
366
+ " margin-left: 1ex;\n",
367
+ " /* unfitted */\n",
368
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
369
+ " color: var(--sklearn-color-unfitted-level-1);\n",
370
+ "}\n",
371
+ "\n",
372
+ ".sk-estimator-doc-link.fitted,\n",
373
+ "a:link.sk-estimator-doc-link.fitted,\n",
374
+ "a:visited.sk-estimator-doc-link.fitted {\n",
375
+ " /* fitted */\n",
376
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
377
+ " color: var(--sklearn-color-fitted-level-1);\n",
378
+ "}\n",
379
+ "\n",
380
+ "/* On hover */\n",
381
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
382
+ ".sk-estimator-doc-link:hover,\n",
383
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
384
+ ".sk-estimator-doc-link:hover {\n",
385
+ " /* unfitted */\n",
386
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
387
+ " color: var(--sklearn-color-background);\n",
388
+ " text-decoration: none;\n",
389
+ "}\n",
390
+ "\n",
391
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
392
+ ".sk-estimator-doc-link.fitted:hover,\n",
393
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
394
+ ".sk-estimator-doc-link.fitted:hover {\n",
395
+ " /* fitted */\n",
396
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
397
+ " color: var(--sklearn-color-background);\n",
398
+ " text-decoration: none;\n",
399
+ "}\n",
400
+ "\n",
401
+ "/* Span, style for the box shown on hovering the info icon */\n",
402
+ ".sk-estimator-doc-link span {\n",
403
+ " display: none;\n",
404
+ " z-index: 9999;\n",
405
+ " position: relative;\n",
406
+ " font-weight: normal;\n",
407
+ " right: .2ex;\n",
408
+ " padding: .5ex;\n",
409
+ " margin: .5ex;\n",
410
+ " width: min-content;\n",
411
+ " min-width: 20ex;\n",
412
+ " max-width: 50ex;\n",
413
+ " color: var(--sklearn-color-text);\n",
414
+ " box-shadow: 2pt 2pt 4pt #999;\n",
415
+ " /* unfitted */\n",
416
+ " background: var(--sklearn-color-unfitted-level-0);\n",
417
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
418
+ "}\n",
419
+ "\n",
420
+ ".sk-estimator-doc-link.fitted span {\n",
421
+ " /* fitted */\n",
422
+ " background: var(--sklearn-color-fitted-level-0);\n",
423
+ " border: var(--sklearn-color-fitted-level-3);\n",
424
+ "}\n",
425
+ "\n",
426
+ ".sk-estimator-doc-link:hover span {\n",
427
+ " display: block;\n",
428
+ "}\n",
429
+ "\n",
430
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
431
+ "\n",
432
+ "#sk-container-id-1 a.estimator_doc_link {\n",
433
+ " float: right;\n",
434
+ " font-size: 1rem;\n",
435
+ " line-height: 1em;\n",
436
+ " font-family: monospace;\n",
437
+ " background-color: var(--sklearn-color-background);\n",
438
+ " border-radius: 1rem;\n",
439
+ " height: 1rem;\n",
440
+ " width: 1rem;\n",
441
+ " text-decoration: none;\n",
442
+ " /* unfitted */\n",
443
+ " color: var(--sklearn-color-unfitted-level-1);\n",
444
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
445
+ "}\n",
446
+ "\n",
447
+ "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
448
+ " /* fitted */\n",
449
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
450
+ " color: var(--sklearn-color-fitted-level-1);\n",
451
+ "}\n",
452
+ "\n",
453
+ "/* On hover */\n",
454
+ "#sk-container-id-1 a.estimator_doc_link:hover {\n",
455
+ " /* unfitted */\n",
456
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
457
+ " color: var(--sklearn-color-background);\n",
458
+ " text-decoration: none;\n",
459
+ "}\n",
460
+ "\n",
461
+ "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
462
+ " /* fitted */\n",
463
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
464
+ "}\n",
465
+ "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>PySRRegressor.equations_ = None</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;PySRRegressor<span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>PySRRegressor.equations_ = None</pre></div> </div></div></div></div>"
466
+ ],
467
+ "text/plain": [
468
+ "PySRRegressor.equations_ = None"
469
+ ]
470
+ },
471
+ "execution_count": 4,
472
+ "metadata": {},
473
+ "output_type": "execute_result"
474
+ }
475
+ ],
476
+ "source": [
477
+ "rstate = np.random.RandomState(0)\n",
478
+ "X = np.random.randn(10, 2)\n",
479
+ "y = np.random.randn(10)\n",
480
+ "\n",
481
+ "model = PySRRegressor(deterministic=True, multithreading=False, procs=0, random_state=0, verbosity=0, progress=False)\n",
482
+ "model"
483
+ ]
484
+ },
485
+ {
486
+ "cell_type": "code",
487
+ "execution_count": 5,
488
+ "metadata": {},
489
+ "outputs": [
490
+ {
491
+ "name": "stderr",
492
+ "output_type": "stream",
493
+ "text": [
494
+ "/Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/.venv/lib/python3.12/site-packages/pysr/sr.py:1297: UserWarning: Note: it looks like you are running in Jupyter. The progress bar will be turned off.\n",
495
+ " warnings.warn(\n"
496
+ ]
497
+ },
498
+ {
499
+ "data": {
500
+ "text/plain": [
501
+ "pandas.core.frame.DataFrame"
502
+ ]
503
+ },
504
+ "execution_count": 5,
505
+ "metadata": {},
506
+ "output_type": "execute_result"
507
+ }
508
+ ],
509
+ "source": [
510
+ "model.fit(X, y)\n",
511
+ "type(model.equations_)"
512
+ ]
513
+ }
514
+ ],
515
+ "metadata": {
516
+ "kernelspec": {
517
+ "display_name": "Python 3 (ipykernel)",
518
+ "language": "python",
519
+ "name": "python3"
520
+ },
521
+ "language_info": {
522
+ "codemirror_mode": {
523
+ "name": "ipython",
524
+ "version": 3
525
+ },
526
+ "file_extension": ".py",
527
+ "mimetype": "text/x-python",
528
+ "name": "python",
529
+ "nbconvert_exporter": "python",
530
+ "pygments_lexer": "ipython3",
531
+ "version": "3.12.0"
532
+ }
533
+ },
534
+ "nbformat": 4,
535
+ "nbformat_minor": 2
536
+ }
pysr/test/test_startup.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+ import subprocess
4
+ import sys
5
+ import tempfile
6
+ import textwrap
7
+ import unittest
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+
12
+ from .. import PySRRegressor
13
+ from ..julia_import import jl_version
14
+ from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
15
+
16
+
17
+ class TestStartup(unittest.TestCase):
18
+ """Various tests related to starting up PySR."""
19
+
20
+ def setUp(self):
21
+ # Using inspect,
22
+ # get default niterations from PySRRegressor, and double them:
23
+ self.default_test_kwargs = dict(
24
+ progress=False,
25
+ model_selection="accuracy",
26
+ niterations=DEFAULT_NITERATIONS * 2,
27
+ populations=DEFAULT_POPULATIONS * 2,
28
+ temp_equation_file=True,
29
+ )
30
+ self.rstate = np.random.RandomState(0)
31
+ self.X = self.rstate.randn(100, 5)
32
+
33
+ def test_warm_start_from_file(self):
34
+ """Test that we can warm start in another process."""
35
+ if platform.system() == "Windows":
36
+ self.skipTest("Warm start test incompatible with Windows")
37
+
38
+ with tempfile.TemporaryDirectory() as tmpdirname:
39
+ model = PySRRegressor(
40
+ **self.default_test_kwargs,
41
+ unary_operators=["cos"],
42
+ )
43
+ model.warm_start = True
44
+ model.temp_equation_file = False
45
+ model.equation_file = Path(tmpdirname) / "equations.csv"
46
+ model.deterministic = True
47
+ model.multithreading = False
48
+ model.random_state = 0
49
+ model.procs = 0
50
+ model.early_stop_condition = 1e-10
51
+
52
+ rstate = np.random.RandomState(0)
53
+ X = rstate.randn(100, 2)
54
+ y = np.cos(X[:, 0]) ** 2
55
+ model.fit(X, y)
56
+
57
+ best_loss = model.equations_.iloc[-1]["loss"]
58
+
59
+ # Save X and y to a file:
60
+ X_file = Path(tmpdirname) / "X.npy"
61
+ y_file = Path(tmpdirname) / "y.npy"
62
+ np.save(X_file, X)
63
+ np.save(y_file, y)
64
+ # Now, create a new process and warm start from the file:
65
+ result = subprocess.run(
66
+ [
67
+ sys.executable,
68
+ "-c",
69
+ textwrap.dedent(
70
+ f"""
71
+ from pysr import PySRRegressor
72
+ import numpy as np
73
+
74
+ X = np.load("{X_file}")
75
+ y = np.load("{y_file}")
76
+
77
+ print("Loading model from file")
78
+ model = PySRRegressor.from_file("{model.equation_file}")
79
+
80
+ assert model.julia_state_ is not None
81
+
82
+ # Reset saved equations; should be loaded from state!
83
+ model.equations_ = None
84
+ model.equation_file_contents_ = None
85
+
86
+ model.warm_start = True
87
+ model.niterations = 0
88
+ model.max_evals = 0
89
+ model.ncycles_per_iteration = 0
90
+
91
+ model.fit(X, y)
92
+
93
+ best_loss = model.equations_.iloc[-1]["loss"]
94
+
95
+ assert best_loss <= {best_loss}
96
+ """
97
+ ),
98
+ ],
99
+ stdout=subprocess.PIPE,
100
+ stderr=subprocess.PIPE,
101
+ env=os.environ,
102
+ )
103
+ self.assertEqual(result.returncode, 0)
104
+ self.assertIn("Loading model from file", result.stdout.decode())
105
+ self.assertIn("Started!", result.stderr.decode())
106
+
107
+ def test_bad_startup_options(self):
108
+ warning_tests = [
109
+ dict(
110
+ code='import os; os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "no"; import pysr',
111
+ msg="PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set",
112
+ ),
113
+ dict(
114
+ code='import os; os.environ["PYTHON_JULIACALL_THREADS"] = "1"; import pysr',
115
+ msg="PYTHON_JULIACALL_THREADS environment variable is set",
116
+ ),
117
+ dict(
118
+ code="import juliacall; import pysr",
119
+ msg="juliacall module already imported.",
120
+ ),
121
+ dict(
122
+ code='import os; os.environ["PYSR_AUTOLOAD_EXTENSIONS"] = "foo"; import pysr',
123
+ msg="PYSR_AUTOLOAD_EXTENSIONS environment variable is set",
124
+ ),
125
+ ]
126
+ for warning_test in warning_tests:
127
+ result = subprocess.run(
128
+ [sys.executable, "-c", warning_test["code"]],
129
+ stdout=subprocess.PIPE,
130
+ stderr=subprocess.PIPE,
131
+ env=os.environ,
132
+ )
133
+ self.assertIn(warning_test["msg"], result.stderr.decode())
134
+
135
+ def test_notebook(self):
136
+ if jl_version < (1, 9, 0):
137
+ self.skipTest("Julia version too old")
138
+ if platform.system() == "Windows":
139
+ self.skipTest("Notebook test incompatible with Windows")
140
+ result = subprocess.run(
141
+ [
142
+ sys.executable,
143
+ "-m",
144
+ "pytest",
145
+ "--nbval",
146
+ str(Path(__file__).parent / "test_nb.ipynb"),
147
+ "--nbval-sanitize-with",
148
+ str(Path(__file__).parent / "nb_sanitize.cfg"),
149
+ ],
150
+ env=os.environ,
151
+ )
152
+ self.assertEqual(result.returncode, 0)
153
+
154
+
155
+ def runtests(just_tests=False):
156
+ tests = [TestStartup]
157
+ if just_tests:
158
+ return tests
159
+ suite = unittest.TestSuite()
160
+ loader = unittest.TestLoader()
161
+ for test in tests:
162
+ suite.addTests(loader.loadTestsFromTestCase(test))
163
+ runner = unittest.TextTestRunner()
164
+ return runner.run(suite)
pysr/test/test_torch.py CHANGED
@@ -1,4 +1,3 @@
1
- import platform
2
  import unittest
3
 
4
  import numpy as np
@@ -7,42 +6,28 @@ import sympy
7
 
8
  from .. import PySRRegressor, sympy2torch
9
 
10
- # Need to initialize Julia before importing torch...
11
-
12
-
13
- def _import_torch():
14
- if platform.system() == "Darwin":
15
- # Import PyJulia, then Torch
16
- from ..julia_helpers import init_julia
17
-
18
- init_julia()
19
-
20
- import torch
21
- else:
22
- # Import Torch, then PyJulia
23
- # https://github.com/pytorch/pytorch/issues/78829
24
- import torch
25
- return torch
26
-
27
 
28
  class TestTorch(unittest.TestCase):
29
  def setUp(self):
30
  np.random.seed(0)
31
 
 
 
 
 
 
32
  def test_sympy2torch(self):
33
- torch = _import_torch()
34
  x, y, z = sympy.symbols("x y z")
35
  cosx = 1.0 * sympy.cos(x) + y
36
 
37
- X = torch.tensor(np.random.randn(1000, 3))
38
- true = 1.0 * torch.cos(X[:, 0]) + X[:, 1]
39
  torch_module = sympy2torch(cosx, [x, y, z])
40
  self.assertTrue(
41
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
42
  )
43
 
44
  def test_pipeline_pandas(self):
45
- torch = _import_torch()
46
  X = pd.DataFrame(np.random.randn(100, 10))
47
  y = np.ones(X.shape[0])
48
  model = PySRRegressor(
@@ -71,13 +56,12 @@ class TestTorch(unittest.TestCase):
71
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
72
 
73
  np.testing.assert_almost_equal(
74
- tformat(torch.tensor(X.values)).detach().numpy(),
75
  np.square(np.cos(X.values[:, 1])), # Selection 1st feature
76
  decimal=3,
77
  )
78
 
79
  def test_pipeline(self):
80
- torch = _import_torch()
81
  X = np.random.randn(100, 10)
82
  y = np.ones(X.shape[0])
83
  model = PySRRegressor(
@@ -106,22 +90,22 @@ class TestTorch(unittest.TestCase):
106
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
107
 
108
  np.testing.assert_almost_equal(
109
- tformat(torch.tensor(X)).detach().numpy(),
110
  np.square(np.cos(X[:, 1])), # 2nd feature
111
  decimal=3,
112
  )
113
 
114
  def test_mod_mapping(self):
115
- torch = _import_torch()
116
  x, y, z = sympy.symbols("x y z")
117
  expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z
118
 
119
  module = sympy2torch(expression, [x, y, z])
120
 
121
- X = torch.rand(100, 3).float() * 10
122
 
123
  true_out = (
124
- X[:, 0] ** 2 + torch.atanh(torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2]
 
125
  )
126
  torch_out = module(X)
127
 
@@ -130,7 +114,6 @@ class TestTorch(unittest.TestCase):
130
  )
131
 
132
  def test_custom_operator(self):
133
- torch = _import_torch()
134
  X = np.random.randn(100, 3)
135
  y = np.ones(X.shape[0])
136
  model = PySRRegressor(
@@ -156,7 +139,7 @@ class TestTorch(unittest.TestCase):
156
  model.set_params(
157
  equation_file="equation_file_custom_operator.csv",
158
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
159
- extra_torch_mappings={"mycustomoperator": torch.sin},
160
  )
161
  model.refresh(checkpoint_file="equation_file_custom_operator.csv")
162
  self.assertEqual(str(model.sympy()), "sin(x1)")
@@ -165,13 +148,12 @@ class TestTorch(unittest.TestCase):
165
  tformat = model.pytorch()
166
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
167
  np.testing.assert_almost_equal(
168
- tformat(torch.tensor(X)).detach().numpy(),
169
  np.sin(X[:, 1]),
170
  decimal=3,
171
  )
172
 
173
  def test_feature_selection_custom_operators(self):
174
- torch = _import_torch()
175
  rstate = np.random.RandomState(0)
176
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
177
  cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
@@ -196,16 +178,20 @@ class TestTorch(unittest.TestCase):
196
 
197
  np_output = model.predict(X.values)
198
 
199
- torch_output = torch_module(torch.tensor(X.values)).detach().numpy()
200
 
201
  np.testing.assert_almost_equal(y.values, np_output, decimal=3)
202
  np.testing.assert_almost_equal(y.values, torch_output, decimal=3)
203
 
204
 
205
- def runtests():
206
  """Run all tests in test_torch.py."""
 
 
 
207
  loader = unittest.TestLoader()
208
  suite = unittest.TestSuite()
209
- suite.addTests(loader.loadTestsFromTestCase(TestTorch))
 
210
  runner = unittest.TextTestRunner()
211
  return runner.run(suite)
 
 
1
  import unittest
2
 
3
  import numpy as np
 
6
 
7
  from .. import PySRRegressor, sympy2torch
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class TestTorch(unittest.TestCase):
11
  def setUp(self):
12
  np.random.seed(0)
13
 
14
+ # Need to import after juliacall:
15
+ import torch
16
+
17
+ self.torch = torch
18
+
19
  def test_sympy2torch(self):
 
20
  x, y, z = sympy.symbols("x y z")
21
  cosx = 1.0 * sympy.cos(x) + y
22
 
23
+ X = self.torch.tensor(np.random.randn(1000, 3))
24
+ true = 1.0 * self.torch.cos(X[:, 0]) + X[:, 1]
25
  torch_module = sympy2torch(cosx, [x, y, z])
26
  self.assertTrue(
27
  np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
28
  )
29
 
30
  def test_pipeline_pandas(self):
 
31
  X = pd.DataFrame(np.random.randn(100, 10))
32
  y = np.ones(X.shape[0])
33
  model = PySRRegressor(
 
56
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
57
 
58
  np.testing.assert_almost_equal(
59
+ tformat(self.torch.tensor(X.values)).detach().numpy(),
60
  np.square(np.cos(X.values[:, 1])), # Selection 1st feature
61
  decimal=3,
62
  )
63
 
64
  def test_pipeline(self):
 
65
  X = np.random.randn(100, 10)
66
  y = np.ones(X.shape[0])
67
  model = PySRRegressor(
 
90
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
91
 
92
  np.testing.assert_almost_equal(
93
+ tformat(self.torch.tensor(X)).detach().numpy(),
94
  np.square(np.cos(X[:, 1])), # 2nd feature
95
  decimal=3,
96
  )
97
 
98
  def test_mod_mapping(self):
 
99
  x, y, z = sympy.symbols("x y z")
100
  expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z
101
 
102
  module = sympy2torch(expression, [x, y, z])
103
 
104
+ X = self.torch.rand(100, 3).float() * 10
105
 
106
  true_out = (
107
+ X[:, 0] ** 2
108
+ + self.torch.atanh(self.torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2]
109
  )
110
  torch_out = module(X)
111
 
 
114
  )
115
 
116
  def test_custom_operator(self):
 
117
  X = np.random.randn(100, 3)
118
  y = np.ones(X.shape[0])
119
  model = PySRRegressor(
 
139
  model.set_params(
140
  equation_file="equation_file_custom_operator.csv",
141
  extra_sympy_mappings={"mycustomoperator": sympy.sin},
142
+ extra_torch_mappings={"mycustomoperator": self.torch.sin},
143
  )
144
  model.refresh(checkpoint_file="equation_file_custom_operator.csv")
145
  self.assertEqual(str(model.sympy()), "sin(x1)")
 
148
  tformat = model.pytorch()
149
  self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
150
  np.testing.assert_almost_equal(
151
+ tformat(self.torch.tensor(X)).detach().numpy(),
152
  np.sin(X[:, 1]),
153
  decimal=3,
154
  )
155
 
156
  def test_feature_selection_custom_operators(self):
 
157
  rstate = np.random.RandomState(0)
158
  X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
159
  cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
 
178
 
179
  np_output = model.predict(X.values)
180
 
181
+ torch_output = torch_module(self.torch.tensor(X.values)).detach().numpy()
182
 
183
  np.testing.assert_almost_equal(y.values, np_output, decimal=3)
184
  np.testing.assert_almost_equal(y.values, torch_output, decimal=3)
185
 
186
 
187
+ def runtests(just_tests=False):
188
  """Run all tests in test_torch.py."""
189
+ tests = [TestTorch]
190
+ if just_tests:
191
+ return tests
192
  loader = unittest.TestLoader()
193
  suite = unittest.TestSuite()
194
+ for test in tests:
195
+ suite.addTests(loader.loadTestsFromTestCase(test))
196
  runner = unittest.TextTestRunner()
197
  return runner.run(suite)
pysr/version.py DELETED
@@ -1,2 +0,0 @@
1
- __version__ = "0.16.9"
2
- __symbolic_regression_jl_version__ = "0.23.1"