Spaces:
Running
Running
MilesCranmer
commited on
Commit
·
59cf3d0
1
Parent(s):
378fe0d
Fix printed score; demonstrate best() function
Browse files- README.md +16 -11
- TODO.md +1 -0
- docs/start.md +16 -11
- pysr/__init__.py +1 -1
- pysr/sr.py +2 -2
README.md
CHANGED
@@ -65,7 +65,7 @@ pip install pysr
|
|
65 |
|
66 |
```python
|
67 |
import numpy as np
|
68 |
-
from pysr import pysr
|
69 |
|
70 |
# Dataset
|
71 |
X = 2*np.random.randn(100, 5)
|
@@ -73,25 +73,30 @@ y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
|
|
73 |
|
74 |
# Learn equations
|
75 |
equations = pysr(X, y, niterations=5,
|
76 |
-
|
77 |
-
|
78 |
|
79 |
-
|
80 |
|
81 |
-
print(
|
82 |
```
|
83 |
|
84 |
which gives:
|
85 |
|
86 |
-
```
|
87 |
-
|
88 |
-
0 5 1.947431 plus(-1.7420927, mult(x0, x0))
|
89 |
-
1 8 0.486858 plus(-1.8710494, plus(cos(x3), mult(x0, x0)))
|
90 |
-
2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
|
91 |
```
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
|
|
95 |
- `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
|
96 |
- `sympy_format` - sympy equation.
|
97 |
- `lambda_format` - a lambda function for that equation, that you can pass values through.
|
|
|
65 |
|
66 |
```python
|
67 |
import numpy as np
|
68 |
+
from pysr import pysr, best, get_hof
|
69 |
|
70 |
# Dataset
|
71 |
X = 2*np.random.randn(100, 5)
|
|
|
73 |
|
74 |
# Learn equations
|
75 |
equations = pysr(X, y, niterations=5,
|
76 |
+
binary_operators=["plus", "mult"],
|
77 |
+
unary_operators=["cos", "exp", "sin"])
|
78 |
|
79 |
+
...# (you can use ctl-c to exit early)
|
80 |
|
81 |
+
print(best())
|
82 |
```
|
83 |
|
84 |
which gives:
|
85 |
|
86 |
+
```python
|
87 |
+
x0**2 + 2.000016*cos(x3) - 1.9999845
|
|
|
|
|
|
|
88 |
```
|
89 |
|
90 |
+
One can also use `best_tex` to get the LaTeX form,
|
91 |
+
or `best_callable` to get a function you can call.
|
92 |
+
This uses a score which balances complexity and error;
|
93 |
+
however, one can see the full list of equations with:
|
94 |
+
```python
|
95 |
+
print(get_hof())
|
96 |
+
```
|
97 |
+
This is a pandas table, with additional columns:
|
98 |
|
99 |
+
- `MSE` - the mean square error of the formula
|
100 |
- `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
|
101 |
- `sympy_format` - sympy equation.
|
102 |
- `lambda_format` - a lambda function for that equation, that you can pass values through.
|
TODO.md
CHANGED
@@ -58,6 +58,7 @@
|
|
58 |
## Feature ideas
|
59 |
|
60 |
- [ ] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
|
|
|
61 |
- [ ] Cross-validation
|
62 |
- [ ] Sympy printing
|
63 |
- [ ] Better cleanup of zombie processes after <ctl-c>
|
|
|
58 |
## Feature ideas
|
59 |
|
60 |
- [ ] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
|
61 |
+
- [ ] Do printing from Python side. Then we can do simplification and pretty-printing.
|
62 |
- [ ] Cross-validation
|
63 |
- [ ] Sympy printing
|
64 |
- [ ] Better cleanup of zombie processes after <ctl-c>
|
docs/start.md
CHANGED
@@ -26,7 +26,7 @@ pip install pysr
|
|
26 |
|
27 |
```python
|
28 |
import numpy as np
|
29 |
-
from pysr import pysr
|
30 |
|
31 |
# Dataset
|
32 |
X = 2*np.random.randn(100, 5)
|
@@ -34,25 +34,30 @@ y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
|
|
34 |
|
35 |
# Learn equations
|
36 |
equations = pysr(X, y, niterations=5,
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
|
41 |
|
42 |
-
print(
|
43 |
```
|
44 |
|
45 |
which gives:
|
46 |
|
47 |
-
```
|
48 |
-
|
49 |
-
0 5 1.947431 plus(-1.7420927, mult(x0, x0))
|
50 |
-
1 8 0.486858 plus(-1.8710494, plus(cos(x3), mult(x0, x0)))
|
51 |
-
2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
|
52 |
```
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
|
|
56 |
- `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
|
57 |
- `sympy_format` - sympy equation.
|
58 |
- `lambda_format` - a lambda function for that equation, that you can pass values through.
|
|
|
26 |
|
27 |
```python
|
28 |
import numpy as np
|
29 |
+
from pysr import pysr, best, get_hof
|
30 |
|
31 |
# Dataset
|
32 |
X = 2*np.random.randn(100, 5)
|
|
|
34 |
|
35 |
# Learn equations
|
36 |
equations = pysr(X, y, niterations=5,
|
37 |
+
binary_operators=["plus", "mult"],
|
38 |
+
unary_operators=["cos", "exp", "sin"])
|
39 |
|
40 |
+
...# (you can use ctl-c to exit early)
|
41 |
|
42 |
+
print(best())
|
43 |
```
|
44 |
|
45 |
which gives:
|
46 |
|
47 |
+
```python
|
48 |
+
x0**2 + 2.000016*cos(x3) - 1.9999845
|
|
|
|
|
|
|
49 |
```
|
50 |
|
51 |
+
One can also use `best_tex` to get the LaTeX form,
|
52 |
+
or `best_callable` to get a function you can call.
|
53 |
+
This uses a score which balances complexity and error;
|
54 |
+
however, one can see the full list of equations with:
|
55 |
+
```python
|
56 |
+
print(get_hof())
|
57 |
+
```
|
58 |
+
This is a pandas table, with additional columns:
|
59 |
|
60 |
+
- `MSE` - the mean square error of the formula
|
61 |
- `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
|
62 |
- `sympy_format` - sympy equation.
|
63 |
- `lambda_format` - a lambda function for that equation, that you can pass values through.
|
pysr/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
from .sr import pysr, get_hof, best, best_tex,
|
|
|
1 |
+
from .sr import pysr, get_hof, best, best_tex, best_callable, best_row
|
pysr/sr.py
CHANGED
@@ -391,7 +391,7 @@ def get_hof(equation_file=None, n_features=None, variable_names=None, extra_symp
|
|
391 |
if lastMSE is None:
|
392 |
cur_score = 0.0
|
393 |
else:
|
394 |
-
cur_score = np.log(curMSE/lastMSE)/(curComplexity - lastComplexity)
|
395 |
|
396 |
scores.append(cur_score)
|
397 |
lastMSE = curMSE
|
@@ -427,7 +427,7 @@ def best_tex(equations=None):
|
|
427 |
best_sympy = best_row(equations)['sympy_format']
|
428 |
return sympy.latex(best_sympy.simplify())
|
429 |
|
430 |
-
def
|
431 |
"""Return the equation with the best score, in callable format"""
|
432 |
if equations is None: equations = get_hof()
|
433 |
return best_row(equations)['lambda_format']
|
|
|
391 |
if lastMSE is None:
|
392 |
cur_score = 0.0
|
393 |
else:
|
394 |
+
cur_score = - np.log(curMSE/lastMSE)/(curComplexity - lastComplexity)
|
395 |
|
396 |
scores.append(cur_score)
|
397 |
lastMSE = curMSE
|
|
|
427 |
best_sympy = best_row(equations)['sympy_format']
|
428 |
return sympy.latex(best_sympy.simplify())
|
429 |
|
430 |
+
def best_callable(equations=None):
|
431 |
"""Return the equation with the best score, in callable format"""
|
432 |
if equations is None: equations = get_hof()
|
433 |
return best_row(equations)['lambda_format']
|