MilesCranmer commited on
Commit
59cf3d0
1 Parent(s): 378fe0d

Fix printed score; demonstrate best() function

Browse files
Files changed (5) hide show
  1. README.md +16 -11
  2. TODO.md +1 -0
  3. docs/start.md +16 -11
  4. pysr/__init__.py +1 -1
  5. pysr/sr.py +2 -2
README.md CHANGED
@@ -65,7 +65,7 @@ pip install pysr
65
 
66
  ```python
67
  import numpy as np
68
- from pysr import pysr
69
 
70
  # Dataset
71
  X = 2*np.random.randn(100, 5)
@@ -73,25 +73,30 @@ y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
73
 
74
  # Learn equations
75
  equations = pysr(X, y, niterations=5,
76
- binary_operators=["plus", "mult"],
77
- unary_operators=["cos", "exp", "sin"])
78
 
79
- ...
80
 
81
- print(equations)
82
  ```
83
 
84
  which gives:
85
 
86
- ```
87
- Complexity MSE Equation
88
- 0 5 1.947431 plus(-1.7420927, mult(x0, x0))
89
- 1 8 0.486858 plus(-1.8710494, plus(cos(x3), mult(x0, x0)))
90
- 2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
91
  ```
92
 
93
- The newest version of PySR also returns three additional columns:
 
 
 
 
 
 
 
94
 
 
95
  - `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
96
  - `sympy_format` - sympy equation.
97
  - `lambda_format` - a lambda function for that equation, that you can pass values through.
 
65
 
66
  ```python
67
  import numpy as np
68
+ from pysr import pysr, best, get_hof
69
 
70
  # Dataset
71
  X = 2*np.random.randn(100, 5)
 
73
 
74
  # Learn equations
75
  equations = pysr(X, y, niterations=5,
76
+ binary_operators=["plus", "mult"],
77
+ unary_operators=["cos", "exp", "sin"])
78
 
79
+ ...# (you can use ctl-c to exit early)
80
 
81
+ print(best())
82
  ```
83
 
84
  which gives:
85
 
86
+ ```python
87
+ x0**2 + 2.000016*cos(x3) - 1.9999845
 
 
 
88
  ```
89
 
90
+ One can also use `best_tex` to get the LaTeX form,
91
+ or `best_callable` to get a function you can call.
92
+ This uses a score which balances complexity and error;
93
+ however, one can see the full list of equations with:
94
+ ```python
95
+ print(get_hof())
96
+ ```
97
+ This is a pandas table, with additional columns:
98
 
99
+ - `MSE` - the mean square error of the formula
100
  - `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
101
  - `sympy_format` - sympy equation.
102
  - `lambda_format` - a lambda function for that equation, that you can pass values through.
TODO.md CHANGED
@@ -58,6 +58,7 @@
58
  ## Feature ideas
59
 
60
  - [ ] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
 
61
  - [ ] Cross-validation
62
  - [ ] Sympy printing
63
  - [ ] Better cleanup of zombie processes after <ctl-c>
 
58
  ## Feature ideas
59
 
60
  - [ ] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas).
61
+ - [ ] Do printing from Python side. Then we can do simplification and pretty-printing.
62
  - [ ] Cross-validation
63
  - [ ] Sympy printing
64
  - [ ] Better cleanup of zombie processes after <ctl-c>
docs/start.md CHANGED
@@ -26,7 +26,7 @@ pip install pysr
26
 
27
  ```python
28
  import numpy as np
29
- from pysr import pysr
30
 
31
  # Dataset
32
  X = 2*np.random.randn(100, 5)
@@ -34,25 +34,30 @@ y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
34
 
35
  # Learn equations
36
  equations = pysr(X, y, niterations=5,
37
- binary_operators=["plus", "mult"],
38
- unary_operators=["cos", "exp", "sin"])
39
 
40
- ...
41
 
42
- print(equations)
43
  ```
44
 
45
  which gives:
46
 
47
- ```
48
- Complexity MSE Equation
49
- 0 5 1.947431 plus(-1.7420927, mult(x0, x0))
50
- 1 8 0.486858 plus(-1.8710494, plus(cos(x3), mult(x0, x0)))
51
- 2 11 0.000000 plus(plus(mult(x0, x0), cos(x3)), plus(-2.0, cos(x3)))
52
  ```
53
 
54
- The newest version of PySR also returns three additional columns:
 
 
 
 
 
 
 
55
 
 
56
  - `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
57
  - `sympy_format` - sympy equation.
58
  - `lambda_format` - a lambda function for that equation, that you can pass values through.
 
26
 
27
  ```python
28
  import numpy as np
29
+ from pysr import pysr, best, get_hof
30
 
31
  # Dataset
32
  X = 2*np.random.randn(100, 5)
 
34
 
35
  # Learn equations
36
  equations = pysr(X, y, niterations=5,
37
+ binary_operators=["plus", "mult"],
38
+ unary_operators=["cos", "exp", "sin"])
39
 
40
+ ...# (you can use ctl-c to exit early)
41
 
42
+ print(best())
43
  ```
44
 
45
  which gives:
46
 
47
+ ```python
48
+ x0**2 + 2.000016*cos(x3) - 1.9999845
 
 
 
49
  ```
50
 
51
+ One can also use `best_tex` to get the LaTeX form,
52
+ or `best_callable` to get a function you can call.
53
+ This uses a score which balances complexity and error;
54
+ however, one can see the full list of equations with:
55
+ ```python
56
+ print(get_hof())
57
+ ```
58
+ This is a pandas table, with additional columns:
59
 
60
+ - `MSE` - the mean square error of the formula
61
  - `score` - a metric akin to Occam's razor; you should use this to help select the "true" equation.
62
  - `sympy_format` - sympy equation.
63
  - `lambda_format` - a lambda function for that equation, that you can pass values through.
pysr/__init__.py CHANGED
@@ -1 +1 @@
1
- from .sr import pysr, get_hof, best, best_tex, best_function
 
1
+ from .sr import pysr, get_hof, best, best_tex, best_callable, best_row
pysr/sr.py CHANGED
@@ -391,7 +391,7 @@ def get_hof(equation_file=None, n_features=None, variable_names=None, extra_symp
391
  if lastMSE is None:
392
  cur_score = 0.0
393
  else:
394
- cur_score = np.log(curMSE/lastMSE)/(curComplexity - lastComplexity)
395
 
396
  scores.append(cur_score)
397
  lastMSE = curMSE
@@ -427,7 +427,7 @@ def best_tex(equations=None):
427
  best_sympy = best_row(equations)['sympy_format']
428
  return sympy.latex(best_sympy.simplify())
429
 
430
- def best_function(equations=None):
431
  """Return the equation with the best score, in callable format"""
432
  if equations is None: equations = get_hof()
433
  return best_row(equations)['lambda_format']
 
391
  if lastMSE is None:
392
  cur_score = 0.0
393
  else:
394
+ cur_score = - np.log(curMSE/lastMSE)/(curComplexity - lastComplexity)
395
 
396
  scores.append(cur_score)
397
  lastMSE = curMSE
 
427
  best_sympy = best_row(equations)['sympy_format']
428
  return sympy.latex(best_sympy.simplify())
429
 
430
+ def best_callable(equations=None):
431
  """Return the equation with the best score, in callable format"""
432
  if equations is None: equations = get_hof()
433
  return best_row(equations)['lambda_format']