MilesCranmer commited on
Commit
f784534
2 Parent(s): 8f3db29 b935024

Merge pull request #26 from MilesCranmer/separate-packages

Browse files
.travis.yml CHANGED
@@ -19,7 +19,7 @@ jobs:
19
  install: pip3 install --upgrade pip
20
 
21
  before_script:
22
- - julia --color=yes -e 'using Pkg; pkg"add Optim; add SpecialFunctions; precompile;"'
23
 
24
  script:
25
  - pip3 install numpy pandas
 
19
  install: pip3 install --upgrade pip
20
 
21
  before_script:
22
+ - julia --color=yes -e 'using Pkg; pkg"add SymbolicRegression; precompile;"'
23
 
24
  script:
25
  - pip3 install numpy pandas
Project.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [deps]
2
+ SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
3
+
4
+ [compat]
5
+ SymbolicRegression = "0.4"
6
+ julia = "1"
README.md CHANGED
@@ -14,7 +14,7 @@ Uses regularized evolution, simulated annealing, and gradient-free optimization.
14
  [Documentation](https://pysr.readthedocs.io/en/latest)
15
 
16
  Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
17
- the pure-Julia version of this package.
18
 
19
  Symbolic regression is a very interpretable machine learning algorithm
20
  for low-dimensional problems: these tools search equation space
@@ -51,22 +51,15 @@ Install Julia - see [downloads](https://julialang.org/downloads/), and
51
  then instructions for [mac](https://julialang.org/downloads/platform/#macos)
52
  and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
53
  (Don't use the `conda-forge` version; it doesn't seem to work properly.)
54
- Then, at the command line,
55
- install and precompile the `Optim` and `SpecialFunctions`
56
- packages via:
57
-
58
- ```bash
59
- julia -e 'using Pkg; pkg"add Optim; add SpecialFunctions; precompile;"'
60
- ```
61
-
62
- For python, you need to have Python 3, numpy, sympy, and pandas installed.
63
-
64
- You can install this package from PyPI with:
65
 
 
66
  ```bash
67
  pip install pysr
68
  ```
69
 
 
 
 
70
  # Quickstart
71
 
72
  Here is some demo code (also found in `example.py`)
 
14
  [Documentation](https://pysr.readthedocs.io/en/latest)
15
 
16
  Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
17
+ the pure-Julia backend of this package.
18
 
19
  Symbolic regression is a very interpretable machine learning algorithm
20
  for low-dimensional problems: these tools search equation space
 
51
  then instructions for [mac](https://julialang.org/downloads/platform/#macos)
52
  and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
53
  (Don't use the `conda-forge` version; it doesn't seem to work properly.)
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ You can install PySR with:
56
  ```bash
57
  pip install pysr
58
  ```
59
 
60
+ The first launch will automatically install the Julia packages
61
+ required.
62
+
63
  # Quickstart
64
 
65
  Here is some demo code (also found in `example.py`)
TODO.md CHANGED
@@ -62,6 +62,8 @@
62
  - [x] Sympy printing
63
  - [ ] Sort these todo lists by priority
64
 
 
 
65
  ## Feature ideas
66
 
67
  - [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
 
62
  - [x] Sympy printing
63
  - [ ] Sort these todo lists by priority
64
 
65
+ - [ ] Automatically convert log, log10, log2, pow to the correct operators.
66
+
67
  ## Feature ideas
68
 
69
  - [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
julia/CheckConstraints.jl DELETED
@@ -1,42 +0,0 @@
1
- # Check if any binary operator are overly complex
2
- function flagBinOperatorComplexity(tree::Node, op::Int)::Bool
3
- if tree.degree == 0
4
- return false
5
- elseif tree.degree == 1
6
- return flagBinOperatorComplexity(tree.l, op)
7
- else
8
- if tree.op == op
9
- overly_complex = (
10
- ((bin_constraints[op][1] > -1) &&
11
- (countNodes(tree.l) > bin_constraints[op][1]))
12
- ||
13
- ((bin_constraints[op][2] > -1) &&
14
- (countNodes(tree.r) > bin_constraints[op][2]))
15
- )
16
- if overly_complex
17
- return true
18
- end
19
- end
20
- return (flagBinOperatorComplexity(tree.l, op) || flagBinOperatorComplexity(tree.r, op))
21
- end
22
- end
23
-
24
- # Check if any unary operators are overly complex
25
- function flagUnaOperatorComplexity(tree::Node, op::Int)::Bool
26
- if tree.degree == 0
27
- return false
28
- elseif tree.degree == 1
29
- if tree.op == op
30
- overly_complex = (
31
- (una_constraints[op] > -1) &&
32
- (countNodes(tree.l) > una_constraints[op])
33
- )
34
- if overly_complex
35
- return true
36
- end
37
- end
38
- return flagUnaOperatorComplexity(tree.l, op)
39
- else
40
- return (flagUnaOperatorComplexity(tree.l, op) || flagUnaOperatorComplexity(tree.r, op))
41
- end
42
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/ConstantOptimization.jl DELETED
@@ -1,49 +0,0 @@
1
- import Optim
2
-
3
- # Proxy function for optimization
4
- function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
5
- setConstants(tree, x)
6
- return scoreFunc(tree)
7
- end
8
-
9
- # Use Nelder-Mead to optimize the constants in an equation
10
- function optimizeConstants(member::PopMember)::PopMember
11
- nconst = countConstants(member.tree)
12
- if nconst == 0
13
- return member
14
- end
15
- x0 = getConstants(member.tree)
16
- f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
17
- if size(x0)[1] == 1
18
- algorithm = Optim.Newton
19
- else
20
- algorithm = Optim.NelderMead
21
- end
22
-
23
- try
24
- result = Optim.optimize(f, x0, algorithm(), Optim.Options(iterations=100))
25
- # Try other initial conditions:
26
- for i=1:nrestarts
27
- tmpresult = Optim.optimize(f, x0 .* (1f0 .+ 5f-1*randn(Float32, size(x0)[1])), algorithm(), Optim.Options(iterations=100))
28
- if tmpresult.minimum < result.minimum
29
- result = tmpresult
30
- end
31
- end
32
-
33
- if Optim.converged(result)
34
- setConstants(member.tree, result.minimizer)
35
- member.score = convert(Float32, result.minimum)
36
- member.birth = getTime()
37
- else
38
- setConstants(member.tree, x0)
39
- end
40
- catch error
41
- # Fine if optimization encountered domain error, just return x0
42
- if isa(error, AssertionError)
43
- setConstants(member.tree, x0)
44
- else
45
- throw(error)
46
- end
47
- end
48
- return member
49
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/Equation.jl DELETED
@@ -1,173 +0,0 @@
1
- # Define a serialization format for the symbolic equations:
2
- mutable struct Node
3
- #Holds operators, variables, constants in a tree
4
- degree::Integer #0 for constant/variable, 1 for cos/sin, 2 for +/* etc.
5
- val::Union{Float32, Integer} #Either const value, or enumerates variable
6
- constant::Bool #false if variable
7
- op::Integer #enumerates operator (separately for degree=1,2)
8
- l::Union{Node, Nothing}
9
- r::Union{Node, Nothing}
10
-
11
- Node(val::Float32) = new(0, val, true, 1, nothing, nothing)
12
- Node(val::Integer) = new(0, val, false, 1, nothing, nothing)
13
- Node(op::Integer, l::Node) = new(1, 0.0f0, false, op, l, nothing)
14
- Node(op::Integer, l::Union{Float32, Integer}) = new(1, 0.0f0, false, op, Node(l), nothing)
15
- Node(op::Integer, l::Node, r::Node) = new(2, 0.0f0, false, op, l, r)
16
-
17
- #Allow to pass the leaf value without additional node call:
18
- Node(op::Integer, l::Union{Float32, Integer}, r::Node) = new(2, 0.0f0, false, op, Node(l), r)
19
- Node(op::Integer, l::Node, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, l, Node(r))
20
- Node(op::Integer, l::Union{Float32, Integer}, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, Node(l), Node(r))
21
- end
22
-
23
- # Copy an equation (faster than deepcopy)
24
- function copyNode(tree::Node)::Node
25
- if tree.degree == 0
26
- return Node(tree.val)
27
- elseif tree.degree == 1
28
- return Node(tree.op, copyNode(tree.l))
29
- else
30
- return Node(tree.op, copyNode(tree.l), copyNode(tree.r))
31
- end
32
- end
33
-
34
- # Count the operators, constants, variables in an equation
35
- function countNodes(tree::Node)::Integer
36
- if tree.degree == 0
37
- return 1
38
- elseif tree.degree == 1
39
- return 1 + countNodes(tree.l)
40
- else
41
- return 1 + countNodes(tree.l) + countNodes(tree.r)
42
- end
43
- end
44
-
45
- # Count the max depth of a tree
46
- function countDepth(tree::Node)::Integer
47
- if tree.degree == 0
48
- return 1
49
- elseif tree.degree == 1
50
- return 1 + countDepth(tree.l)
51
- else
52
- return 1 + max(countDepth(tree.l), countDepth(tree.r))
53
- end
54
- end
55
-
56
- # Convert an equation to a string
57
- function stringTree(tree::Node)::String
58
- if tree.degree == 0
59
- if tree.constant
60
- return string(tree.val)
61
- else
62
- if useVarMap
63
- return varMap[tree.val]
64
- else
65
- return "x$(tree.val - 1)"
66
- end
67
- end
68
- elseif tree.degree == 1
69
- return "$(unaops[tree.op])($(stringTree(tree.l)))"
70
- else
71
- return "$(binops[tree.op])($(stringTree(tree.l)), $(stringTree(tree.r)))"
72
- end
73
- end
74
-
75
- # Print an equation
76
- function printTree(tree::Node)
77
- println(stringTree(tree))
78
- end
79
-
80
- # Return a random node from the tree
81
- function randomNode(tree::Node)::Node
82
- if tree.degree == 0
83
- return tree
84
- end
85
- a = countNodes(tree)
86
- b = 0
87
- c = 0
88
- if tree.degree >= 1
89
- b = countNodes(tree.l)
90
- end
91
- if tree.degree == 2
92
- c = countNodes(tree.r)
93
- end
94
-
95
- i = rand(1:1+b+c)
96
- if i <= b
97
- return randomNode(tree.l)
98
- elseif i == b + 1
99
- return tree
100
- end
101
-
102
- return randomNode(tree.r)
103
- end
104
-
105
- # Count the number of unary operators in the equation
106
- function countUnaryOperators(tree::Node)::Integer
107
- if tree.degree == 0
108
- return 0
109
- elseif tree.degree == 1
110
- return 1 + countUnaryOperators(tree.l)
111
- else
112
- return 0 + countUnaryOperators(tree.l) + countUnaryOperators(tree.r)
113
- end
114
- end
115
-
116
- # Count the number of binary operators in the equation
117
- function countBinaryOperators(tree::Node)::Integer
118
- if tree.degree == 0
119
- return 0
120
- elseif tree.degree == 1
121
- return 0 + countBinaryOperators(tree.l)
122
- else
123
- return 1 + countBinaryOperators(tree.l) + countBinaryOperators(tree.r)
124
- end
125
- end
126
-
127
- # Count the number of operators in the equation
128
- function countOperators(tree::Node)::Integer
129
- return countUnaryOperators(tree) + countBinaryOperators(tree)
130
- end
131
-
132
-
133
- # Count the number of constants in an equation
134
- function countConstants(tree::Node)::Integer
135
- if tree.degree == 0
136
- return convert(Integer, tree.constant)
137
- elseif tree.degree == 1
138
- return 0 + countConstants(tree.l)
139
- else
140
- return 0 + countConstants(tree.l) + countConstants(tree.r)
141
- end
142
- end
143
-
144
- # Get all the constants from a tree
145
- function getConstants(tree::Node)::Array{Float32, 1}
146
- if tree.degree == 0
147
- if tree.constant
148
- return [tree.val]
149
- else
150
- return Float32[]
151
- end
152
- elseif tree.degree == 1
153
- return getConstants(tree.l)
154
- else
155
- both = [getConstants(tree.l), getConstants(tree.r)]
156
- return [constant for subtree in both for constant in subtree]
157
- end
158
- end
159
-
160
- # Set all the constants inside a tree
161
- function setConstants(tree::Node, constants::Array{Float32, 1})
162
- if tree.degree == 0
163
- if tree.constant
164
- tree.val = constants[1]
165
- end
166
- elseif tree.degree == 1
167
- setConstants(tree.l, constants)
168
- else
169
- numberLeft = countConstants(tree.l)
170
- setConstants(tree.l, constants)
171
- setConstants(tree.r, constants[numberLeft+1:end])
172
- end
173
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/EvaluateEquation.jl DELETED
@@ -1,47 +0,0 @@
1
- # Evaluate an equation over an array of datapoints
2
- function evalTreeArray(tree::Node)::Union{Array{Float32, 1}, Nothing}
3
- return evalTreeArray(tree, X)
4
- end
5
-
6
-
7
- # Evaluate an equation over an array of datapoints
8
- function evalTreeArray(tree::Node, cX::Array{Float32, 2})::Union{Array{Float32, 1}, Nothing}
9
- clen = size(cX)[1]
10
- if tree.degree == 0
11
- if tree.constant
12
- return fill(tree.val, clen)
13
- else
14
- return copy(cX[:, tree.val])
15
- end
16
- elseif tree.degree == 1
17
- cumulator = evalTreeArray(tree.l, cX)
18
- if cumulator === nothing
19
- return nothing
20
- end
21
- op_idx = tree.op
22
- UNAOP!(cumulator, op_idx, clen)
23
- @inbounds for i=1:clen
24
- if isinf(cumulator[i]) || isnan(cumulator[i])
25
- return nothing
26
- end
27
- end
28
- return cumulator
29
- else
30
- cumulator = evalTreeArray(tree.l, cX)
31
- if cumulator === nothing
32
- return nothing
33
- end
34
- array2 = evalTreeArray(tree.r, cX)
35
- if array2 === nothing
36
- return nothing
37
- end
38
- op_idx = tree.op
39
- BINOP!(cumulator, array2, op_idx, clen)
40
- @inbounds for i=1:clen
41
- if isinf(cumulator[i]) || isnan(cumulator[i])
42
- return nothing
43
- end
44
- end
45
- return cumulator
46
- end
47
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/LossFunctions.jl DELETED
@@ -1,82 +0,0 @@
1
- import Random: randperm
2
-
3
- # Sum of square error between two arrays
4
- function SSE(x::Array{Float32}, y::Array{Float32})::Float32
5
- diff = (x - y)
6
- return sum(diff .* diff)
7
- end
8
- function SSE(x::Nothing, y::Array{Float32})::Float32
9
- return 1f9
10
- end
11
-
12
- # Sum of square error between two arrays, with weights
13
- function SSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
14
- diff = (x - y)
15
- return sum(diff .* diff .* w)
16
- end
17
- function SSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
18
- return Nothing
19
- end
20
-
21
- # Mean of square error between two arrays
22
- function MSE(x::Nothing, y::Array{Float32})::Float32
23
- return 1f9
24
- end
25
-
26
- # Mean of square error between two arrays
27
- function MSE(x::Array{Float32}, y::Array{Float32})::Float32
28
- return SSE(x, y)/size(x)[1]
29
- end
30
-
31
- # Mean of square error between two arrays
32
- function MSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
33
- return 1f9
34
- end
35
-
36
- # Mean of square error between two arrays
37
- function MSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
38
- return SSE(x, y, w)/sum(w)
39
- end
40
-
41
- if weighted
42
- const avgy = sum(y .* weights)/sum(weights)
43
- const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy), weights)
44
- else
45
- const avgy = sum(y)/len
46
- const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy))
47
- end
48
-
49
- # Score an equation
50
- function scoreFunc(tree::Node)::Float32
51
- prediction = evalTreeArray(tree)
52
- if prediction === nothing
53
- return 1f9
54
- end
55
- if weighted
56
- mse = MSE(prediction, y, weights)
57
- else
58
- mse = MSE(prediction, y)
59
- end
60
- return mse / baselineMSE + countNodes(tree)*parsimony
61
- end
62
-
63
- # Score an equation with a small batch
64
- function scoreFuncBatch(tree::Node)::Float32
65
- # batchSize
66
- batch_idx = randperm(len)[1:batchSize]
67
- batch_X = X[batch_idx, :]
68
- prediction = evalTreeArray(tree, batch_X)
69
- if prediction === nothing
70
- return 1f9
71
- end
72
- size_adjustment = 1f0
73
- batch_y = y[batch_idx]
74
- if weighted
75
- batch_w = weights[batch_idx]
76
- mse = MSE(prediction, batch_y, batch_w)
77
- size_adjustment = 1f0 * len / batchSize
78
- else
79
- mse = MSE(prediction, batch_y)
80
- end
81
- return size_adjustment * mse / baselineMSE + countNodes(tree)*parsimony
82
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/Mutate.jl DELETED
@@ -1,124 +0,0 @@
1
- # Go through one mutation cycle
2
- function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyComplexity::Array{Float32, 1})::PopMember
3
- prev = member.tree
4
- tree = prev
5
- #TODO - reconsider this
6
- if batching
7
- beforeLoss = scoreFuncBatch(prev)
8
- else
9
- beforeLoss = member.score
10
- end
11
-
12
- mutationChoice = rand()
13
- #More constants => more likely to do constant mutation
14
- weightAdjustmentMutateConstant = min(8, countConstants(prev))/8.0
15
- cur_weights = copy(mutationWeights) .* 1.0
16
- cur_weights[1] *= weightAdjustmentMutateConstant
17
- n = countNodes(prev)
18
- depth = countDepth(prev)
19
-
20
- # If equation too big, don't add new operators
21
- if n >= curmaxsize || depth >= maxdepth
22
- cur_weights[3] = 0.0
23
- cur_weights[4] = 0.0
24
- end
25
- cur_weights /= sum(cur_weights)
26
- cweights = cumsum(cur_weights)
27
-
28
- successful_mutation = false
29
- #TODO: Currently we dont take this \/ into account
30
- is_success_always_possible = true
31
- attempts = 0
32
- max_attempts = 10
33
-
34
- #############################################
35
- # Mutations
36
- #############################################
37
- while (!successful_mutation) && attempts < max_attempts
38
- tree = copyNode(prev)
39
- successful_mutation = true
40
- if mutationChoice < cweights[1]
41
- tree = mutateConstant(tree, T)
42
-
43
- is_success_always_possible = true
44
- # Mutating a constant shouldn't invalidate an already-valid function
45
-
46
- elseif mutationChoice < cweights[2]
47
- tree = mutateOperator(tree)
48
-
49
- is_success_always_possible = true
50
- # Can always mutate to the same operator
51
-
52
- elseif mutationChoice < cweights[3]
53
- if rand() < 0.5
54
- tree = appendRandomOp(tree)
55
- else
56
- tree = prependRandomOp(tree)
57
- end
58
- is_success_always_possible = false
59
- # Can potentially have a situation without success
60
- elseif mutationChoice < cweights[4]
61
- tree = insertRandomOp(tree)
62
- is_success_always_possible = false
63
- elseif mutationChoice < cweights[5]
64
- tree = deleteRandomOp(tree)
65
- is_success_always_possible = true
66
- elseif mutationChoice < cweights[6]
67
- tree = simplifyTree(tree) # Sometimes we simplify tree
68
- tree = combineOperators(tree) # See if repeated constants at outer levels
69
- return PopMember(tree, beforeLoss)
70
-
71
- is_success_always_possible = true
72
- # Simplification shouldn't hurt complexity; unless some non-symmetric constraint
73
- # to commutative operator...
74
-
75
- elseif mutationChoice < cweights[7]
76
- tree = genRandomTree(5) # Sometimes we generate a new tree completely tree
77
-
78
- is_success_always_possible = true
79
- else # no mutation applied
80
- return PopMember(tree, beforeLoss)
81
- end
82
-
83
- # Check for illegal equations
84
- for i=1:nbin
85
- if successful_mutation && flagBinOperatorComplexity(tree, i)
86
- successful_mutation = false
87
- end
88
- end
89
- for i=1:nuna
90
- if successful_mutation && flagUnaOperatorComplexity(tree, i)
91
- successful_mutation = false
92
- end
93
- end
94
-
95
- attempts += 1
96
- end
97
- #############################################
98
-
99
- if !successful_mutation
100
- return PopMember(copyNode(prev), beforeLoss)
101
- end
102
-
103
- if batching
104
- afterLoss = scoreFuncBatch(tree)
105
- else
106
- afterLoss = scoreFunc(tree)
107
- end
108
-
109
- if annealing
110
- delta = afterLoss - beforeLoss
111
- probChange = exp(-delta/(T*alpha))
112
- if useFrequency
113
- oldSize = countNodes(prev)
114
- newSize = countNodes(tree)
115
- probChange *= frequencyComplexity[oldSize] / frequencyComplexity[newSize]
116
- end
117
-
118
- return_unaltered = (isnan(afterLoss) || probChange < rand())
119
- if return_unaltered
120
- return PopMember(copyNode(prev), beforeLoss)
121
- end
122
- end
123
- return PopMember(tree, afterLoss)
124
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/MutationFunctions.jl DELETED
@@ -1,239 +0,0 @@
1
- # Randomly convert an operator into another one (binary->binary;
2
- # unary->unary)
3
- function mutateOperator(tree::Node)::Node
4
- if countOperators(tree) == 0
5
- return tree
6
- end
7
- node = randomNode(tree)
8
- while node.degree == 0
9
- node = randomNode(tree)
10
- end
11
- if node.degree == 1
12
- node.op = rand(1:length(unaops))
13
- else
14
- node.op = rand(1:length(binops))
15
- end
16
- return tree
17
- end
18
-
19
- # Randomly perturb a constant
20
- function mutateConstant(
21
- tree::Node, T::Float32,
22
- probNegate::Float32=0.01f0)::Node
23
- # T is between 0 and 1.
24
-
25
- if countConstants(tree) == 0
26
- return tree
27
- end
28
- node = randomNode(tree)
29
- while node.degree != 0 || node.constant == false
30
- node = randomNode(tree)
31
- end
32
-
33
- bottom = 0.1f0
34
- maxChange = perturbationFactor * T + 1.0f0 + bottom
35
- factor = maxChange^Float32(rand())
36
- makeConstBigger = rand() > 0.5
37
-
38
- if makeConstBigger
39
- node.val *= factor
40
- else
41
- node.val /= factor
42
- end
43
-
44
- if rand() > probNegate
45
- node.val *= -1
46
- end
47
-
48
- return tree
49
- end
50
-
51
- # Add a random unary/binary operation to the end of a tree
52
- function appendRandomOp(tree::Node)::Node
53
- node = randomNode(tree)
54
- while node.degree != 0
55
- node = randomNode(tree)
56
- end
57
-
58
- choice = rand()
59
- makeNewBinOp = choice < nbin/nops
60
- if rand() > 0.5
61
- left = Float32(randn())
62
- else
63
- left = rand(1:nvar)
64
- end
65
- if rand() > 0.5
66
- right = Float32(randn())
67
- else
68
- right = rand(1:nvar)
69
- end
70
-
71
- if makeNewBinOp
72
- newnode = Node(
73
- rand(1:length(binops)),
74
- left,
75
- right
76
- )
77
- else
78
- newnode = Node(
79
- rand(1:length(unaops)),
80
- left
81
- )
82
- end
83
- node.l = newnode.l
84
- node.r = newnode.r
85
- node.op = newnode.op
86
- node.degree = newnode.degree
87
- node.val = newnode.val
88
- node.constant = newnode.constant
89
- return tree
90
- end
91
-
92
- # Insert random node
93
- function insertRandomOp(tree::Node)::Node
94
- node = randomNode(tree)
95
- choice = rand()
96
- makeNewBinOp = choice < nbin/nops
97
- left = copyNode(node)
98
-
99
- if makeNewBinOp
100
- right = randomConstantNode()
101
- newnode = Node(
102
- rand(1:length(binops)),
103
- left,
104
- right
105
- )
106
- else
107
- newnode = Node(
108
- rand(1:length(unaops)),
109
- left
110
- )
111
- end
112
- node.l = newnode.l
113
- node.r = newnode.r
114
- node.op = newnode.op
115
- node.degree = newnode.degree
116
- node.val = newnode.val
117
- node.constant = newnode.constant
118
- return tree
119
- end
120
-
121
- # Add random node to the top of a tree
122
- function prependRandomOp(tree::Node)::Node
123
- node = tree
124
- choice = rand()
125
- makeNewBinOp = choice < nbin/nops
126
- left = copyNode(tree)
127
-
128
- if makeNewBinOp
129
- right = randomConstantNode()
130
- newnode = Node(
131
- rand(1:length(binops)),
132
- left,
133
- right
134
- )
135
- else
136
- newnode = Node(
137
- rand(1:length(unaops)),
138
- left
139
- )
140
- end
141
- node.l = newnode.l
142
- node.r = newnode.r
143
- node.op = newnode.op
144
- node.degree = newnode.degree
145
- node.val = newnode.val
146
- node.constant = newnode.constant
147
- return node
148
- end
149
-
150
- function randomConstantNode()::Node
151
- if rand() > 0.5
152
- val = Float32(randn())
153
- else
154
- val = rand(1:nvar)
155
- end
156
- newnode = Node(val)
157
- return newnode
158
- end
159
-
160
- # Return a random node from the tree with parent
161
- function randomNodeAndParent(tree::Node, parent::Union{Node, Nothing})::Tuple{Node, Union{Node, Nothing}}
162
- if tree.degree == 0
163
- return tree, parent
164
- end
165
- a = countNodes(tree)
166
- b = 0
167
- c = 0
168
- if tree.degree >= 1
169
- b = countNodes(tree.l)
170
- end
171
- if tree.degree == 2
172
- c = countNodes(tree.r)
173
- end
174
-
175
- i = rand(1:1+b+c)
176
- if i <= b
177
- return randomNodeAndParent(tree.l, tree)
178
- elseif i == b + 1
179
- return tree, parent
180
- end
181
-
182
- return randomNodeAndParent(tree.r, tree)
183
- end
184
-
185
- # Select a random node, and replace it an the subtree
186
- # with a variable or constant
187
- function deleteRandomOp(tree::Node)::Node
188
- node, parent = randomNodeAndParent(tree, nothing)
189
- isroot = (parent === nothing)
190
-
191
- if node.degree == 0
192
- # Replace with new constant
193
- newnode = randomConstantNode()
194
- node.l = newnode.l
195
- node.r = newnode.r
196
- node.op = newnode.op
197
- node.degree = newnode.degree
198
- node.val = newnode.val
199
- node.constant = newnode.constant
200
- elseif node.degree == 1
201
- # Join one of the children with the parent
202
- if isroot
203
- return node.l
204
- elseif parent.l == node
205
- parent.l = node.l
206
- else
207
- parent.r = node.l
208
- end
209
- else
210
- # Join one of the children with the parent
211
- if rand() < 0.5
212
- if isroot
213
- return node.l
214
- elseif parent.l == node
215
- parent.l = node.l
216
- else
217
- parent.r = node.l
218
- end
219
- else
220
- if isroot
221
- return node.r
222
- elseif parent.l == node
223
- parent.l = node.r
224
- else
225
- parent.r = node.r
226
- end
227
- end
228
- end
229
- return tree
230
- end
231
-
232
- # Create a random equation by appending random operators
233
- function genRandomTree(length::Integer)::Node
234
- tree = Node(1.0f0)
235
- for i=1:length
236
- tree = appendRandomOp(tree)
237
- end
238
- return tree
239
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/Operators.jl DELETED
@@ -1,56 +0,0 @@
1
- import SpecialFunctions: gamma, lgamma, erf, erfc, beta
2
-
3
-
4
- import Base.FastMath: sqrt_llvm_fast, neg_float_fast,
5
- add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
6
- eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast,
7
- sign_fast, abs_fast, log_fast, log2_fast, log10_fast, sqrt_fast,
8
- pow_fast
9
-
10
- # Implicitly defined:
11
- #binary: mod
12
- #unary: exp, abs, log1p, sin, cos, tan, sinh, cosh, tanh, asin, acos, atan, asinh, acosh, atanh, erf, erfc, gamma, relu, round, floor, ceil, round, sign.
13
-
14
- # Use some fast operators from https://github.com/JuliaLang/julia/blob/81597635c4ad1e8c2e1c5753fda4ec0e7397543f/base/fastmath.jl
15
- # Define allowed operators. Any julia operator can also be used.
16
- plus(x::Float32, y::Float32)::Float32 = add_float_fast(x, y) #Do not change the name of this operator.
17
- sub(x::Float32, y::Float32)::Float32 = sub_float_fast(x, y) #Do not change the name of this operator.
18
- mult(x::Float32, y::Float32)::Float32 = mul_float_fast(x, y) #Do not change the name of this operator.
19
- square(x::Float32)::Float32 = mul_float_fast(x, x)
20
- cube(x::Float32)::Float32 = mul_float_fast(mul_float_fast(x, x), x)
21
- pow(x::Float32, y::Float32)::Float32 = sign_fast(x)*pow_fast(abs(x), y)
22
- div(x::Float32, y::Float32)::Float32 = div_float_fast(x, y)
23
- logm(x::Float32)::Float32 = log_fast(abs_fast(x) + 1f-8)
24
- logm2(x::Float32)::Float32 = log2_fast(abs_fast(x) + 1f-8)
25
- logm10(x::Float32)::Float32 = log10_fast(abs_fast(x) + 1f-8)
26
- sqrtm(x::Float32)::Float32 = sqrt_fast(abs_fast(x))
27
- neg(x::Float32)::Float32 = neg_float_fast(x)
28
-
29
- function greater(x::Float32, y::Float32)::Float32
30
- if x > y
31
- return 1f0
32
- end
33
- return 0f0
34
- end
35
-
36
- function relu(x::Float32)::Float32
37
- if x > 0f0
38
- return x
39
- end
40
- return 0f0
41
- end
42
-
43
- function logical_or(x::Float32, y::Float32)::Float32
44
- if x > 0f0 || y > 0f0
45
- return 1f0
46
- end
47
- return 0f0
48
- end
49
-
50
- # (Just use multiplication normally)
51
- function logical_and(x::Float32, y::Float32)::Float32
52
- if x > 0f0 && y > 0f0
53
- return 1f0
54
- end
55
- return 0f0
56
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/PopMember.jl DELETED
@@ -1,10 +0,0 @@
1
- # Define a member of population by equation, score, and age
2
- mutable struct PopMember
3
- tree::Node
4
- score::Float32
5
- birth::Integer
6
-
7
- PopMember(t::Node) = new(t, scoreFunc(t), getTime())
8
- PopMember(t::Node, score::Float32) = new(t, score, getTime())
9
-
10
- end
 
 
 
 
 
 
 
 
 
 
 
julia/Population.jl DELETED
@@ -1,40 +0,0 @@
1
- # A list of members of the population, with easy constructors,
2
- # which allow for random generation of new populations
3
- mutable struct Population
4
- members::Array{PopMember, 1}
5
- n::Integer
6
-
7
- Population(pop::Array{PopMember, 1}) = new(pop, size(pop)[1])
8
- Population(npop::Integer) = new([PopMember(genRandomTree(3)) for i=1:npop], npop)
9
- Population(npop::Integer, nlength::Integer) = new([PopMember(genRandomTree(nlength)) for i=1:npop], npop)
10
-
11
- end
12
-
13
- # Sample 10 random members of the population, and make a new one
14
- function samplePop(pop::Population)::Population
15
- idx = rand(1:pop.n, ns)
16
- return Population(pop.members[idx])
17
- end
18
-
19
- # Sample the population, and get the best member from that sample
20
- function bestOfSample(pop::Population)::PopMember
21
- sample = samplePop(pop)
22
- best_idx = argmin([sample.members[member].score for member=1:sample.n])
23
- return sample.members[best_idx]
24
- end
25
-
26
- function finalizeScores(pop::Population)::Population
27
- need_recalculate = batching
28
- if need_recalculate
29
- @inbounds @simd for member=1:pop.n
30
- pop.members[member].score = scoreFunc(pop.members[member].tree)
31
- end
32
- end
33
- return pop
34
- end
35
-
36
- # Return best 10 examples
37
- function bestSubPop(pop::Population; topn::Integer=10)::Population
38
- best_idx = sortperm([pop.members[member].score for member=1:pop.n])
39
- return Population(pop.members[best_idx[1:topn]])
40
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/ProgramConstants.jl DELETED
@@ -1,9 +0,0 @@
1
-
2
- const maxdegree = 2
3
- const actualMaxsize = maxsize + maxdegree
4
- const len = size(X)[1]
5
-
6
- const nuna = size(unaops)[1]
7
- const nbin = size(binops)[1]
8
- const nops = nuna + nbin
9
- const nvar = size(X)[2];
 
 
 
 
 
 
 
 
 
 
julia/RegularizedEvolution.jl DELETED
@@ -1,46 +0,0 @@
1
- import Random: shuffle!
2
-
3
- # Pass through the population several times, replacing the oldest
4
- # with the fittest of a small subsample
5
- function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
6
- frequencyComplexity::Array{Float32, 1})::Population
7
- # Batch over each subsample. Can give 15% improvement in speed; probably moreso for large pops.
8
- # but is ultimately a different algorithm than regularized evolution, and might not be
9
- # as good.
10
- if fast_cycle
11
- shuffle!(pop.members)
12
- n_evol_cycles = round(Integer, pop.n/ns)
13
- babies = Array{PopMember}(undef, n_evol_cycles)
14
-
15
- # Iterate each ns-member sub-sample
16
- @inbounds Threads.@threads for i=1:n_evol_cycles
17
- best_score = Inf32
18
- best_idx = 1+(i-1)*ns
19
- # Calculate best member of the subsample:
20
- for sub_i=1+(i-1)*ns:i*ns
21
- if pop.members[sub_i].score < best_score
22
- best_score = pop.members[sub_i].score
23
- best_idx = sub_i
24
- end
25
- end
26
- allstar = pop.members[best_idx]
27
- babies[i] = iterate(allstar, T, curmaxsize, frequencyComplexity)
28
- end
29
-
30
- # Replace the n_evol_cycles-oldest members of each population
31
- @inbounds for i=1:n_evol_cycles
32
- oldest = argmin([pop.members[member].birth for member=1:pop.n])
33
- pop.members[oldest] = babies[i]
34
- end
35
- else
36
- for i=1:round(Integer, pop.n/ns)
37
- allstar = bestOfSample(pop)
38
- baby = iterate(allstar, T, curmaxsize, frequencyComplexity)
39
- #printTree(baby.tree)
40
- oldest = argmin([pop.members[member].birth for member=1:pop.n])
41
- pop.members[oldest] = baby
42
- end
43
- end
44
-
45
- return pop
46
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/SimplifyEquation.jl DELETED
@@ -1,106 +0,0 @@
1
- # Simplify tree
2
- function combineOperators(tree::Node)::Node
3
- # NOTE: (const (+*-) const) already accounted for. Call simplifyTree before.
4
- # ((const + var) + const) => (const + var)
5
- # ((const * var) * const) => (const * var)
6
- # ((const - var) - const) => (const - var)
7
- # (want to add anything commutative!)
8
- # TODO - need to combine plus/sub if they are both there.
9
- if tree.degree == 0
10
- return tree
11
- elseif tree.degree == 1
12
- tree.l = combineOperators(tree.l)
13
- elseif tree.degree == 2
14
- tree.l = combineOperators(tree.l)
15
- tree.r = combineOperators(tree.r)
16
- end
17
-
18
- top_level_constant = tree.degree == 2 && (tree.l.constant || tree.r.constant)
19
- if tree.degree == 2 && (binops[tree.op] === mult || binops[tree.op] === plus) && top_level_constant
20
- op = tree.op
21
- # Put the constant in r. Need to assume var in left for simplification assumption.
22
- if tree.l.constant
23
- tmp = tree.r
24
- tree.r = tree.l
25
- tree.l = tmp
26
- end
27
- topconstant = tree.r.val
28
- # Simplify down first
29
- below = tree.l
30
- if below.degree == 2 && below.op == op
31
- if below.l.constant
32
- tree = below
33
- tree.l.val = binops[op](tree.l.val, topconstant)
34
- elseif below.r.constant
35
- tree = below
36
- tree.r.val = binops[op](tree.r.val, topconstant)
37
- end
38
- end
39
- end
40
-
41
- if tree.degree == 2 && binops[tree.op] === sub && top_level_constant
42
- # Currently just simplifies subtraction. (can't assume both plus and sub are operators)
43
- # Not commutative, so use different op.
44
- if tree.l.constant
45
- if tree.r.degree == 2 && binops[tree.r.op] === sub
46
- if tree.r.l.constant
47
- #(const - (const - var)) => (var - const)
48
- l = tree.l
49
- r = tree.r
50
- simplified_const = -(l.val - r.l.val) #neg(sub(l.val, r.l.val))
51
- tree.l = tree.r.r
52
- tree.r = l
53
- tree.r.val = simplified_const
54
- elseif tree.r.r.constant
55
- #(const - (var - const)) => (const - var)
56
- l = tree.l
57
- r = tree.r
58
- simplified_const = l.val + r.r.val #plus(l.val, r.r.val)
59
- tree.r = tree.r.l
60
- tree.l.val = simplified_const
61
- end
62
- end
63
- else #tree.r.constant is true
64
- if tree.l.degree == 2 && binops[tree.l.op] === sub
65
- if tree.l.l.constant
66
- #((const - var) - const) => (const - var)
67
- l = tree.l
68
- r = tree.r
69
- simplified_const = l.l.val - r.val#sub(l.l.val, r.val)
70
- tree.r = tree.l.r
71
- tree.l = r
72
- tree.l.val = simplified_const
73
- elseif tree.l.r.constant
74
- #((var - const) - const) => (var - const)
75
- l = tree.l
76
- r = tree.r
77
- simplified_const = r.val + l.r.val #plus(r.val, l.r.val)
78
- tree.l = tree.l.l
79
- tree.r.val = simplified_const
80
- end
81
- end
82
- end
83
- end
84
- return tree
85
- end
86
-
87
- # Simplify tree
88
- function simplifyTree(tree::Node)::Node
89
- if tree.degree == 1
90
- tree.l = simplifyTree(tree.l)
91
- if tree.l.degree == 0 && tree.l.constant
92
- return Node(unaops[tree.op](tree.l.val))
93
- end
94
- elseif tree.degree == 2
95
- tree.l = simplifyTree(tree.l)
96
- tree.r = simplifyTree(tree.r)
97
- constantsBelow = (
98
- tree.l.degree == 0 && tree.l.constant &&
99
- tree.r.degree == 0 && tree.r.constant
100
- )
101
- if constantsBelow
102
- return Node(binops[tree.op](tree.l.val, tree.r.val))
103
- end
104
- end
105
- return tree
106
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/SingleIteration.jl DELETED
@@ -1,28 +0,0 @@
1
- # Cycle through regularized evolution many times,
2
- # printing the fittest equation every 10% through
3
- function run(
4
- pop::Population,
5
- ncycles::Integer,
6
- curmaxsize::Integer,
7
- frequencyComplexity::Array{Float32, 1};
8
- verbosity::Integer=0
9
- )::Population
10
-
11
- allT = LinRange(1.0f0, 0.0f0, ncycles)
12
- for iT in 1:size(allT)[1]
13
- if annealing
14
- pop = regEvolCycle(pop, allT[iT], curmaxsize, frequencyComplexity)
15
- else
16
- pop = regEvolCycle(pop, 1.0f0, curmaxsize, frequencyComplexity)
17
- end
18
-
19
- if verbosity > 0 && (iT % verbosity == 0)
20
- bestPops = bestSubPop(pop)
21
- bestCurScoreIdx = argmin([bestPops.members[member].score for member=1:bestPops.n])
22
- bestCurScore = bestPops.members[bestCurScoreIdx].score
23
- debug(verbosity, bestCurScore, " is the score for ", stringTree(bestPops.members[bestCurScoreIdx].tree))
24
- end
25
- end
26
-
27
- return pop
28
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/Utils.jl DELETED
@@ -1,34 +0,0 @@
1
- import Printf: @printf
2
-
3
- function id(x::Float32)::Float32
4
- x
5
- end
6
-
7
- function debug(verbosity, string...)
8
- verbosity > 0 ? println(string...) : nothing
9
- end
10
-
11
- function getTime()::Integer
12
- return round(Integer, 1e3*(time()-1.6e9))
13
- end
14
-
15
- # Check for errors before they happen
16
- function testConfiguration()
17
- test_input = LinRange(-100f0, 100f0, 99)
18
-
19
- try
20
- for left in test_input
21
- for right in test_input
22
- for binop in binops
23
- test_output = binop.(left, right)
24
- end
25
- end
26
- for unaop in unaops
27
- test_output = unaop.(left)
28
- end
29
- end
30
- catch error
31
- @printf("\n\nYour configuration is invalid - one of your operators is not well-defined over the real line.\n\n\n")
32
- throw(error)
33
- end
34
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/halloffame.jl DELETED
@@ -1,8 +0,0 @@
1
- # List of the best members seen all time
2
- mutable struct HallOfFame
3
- members::Array{PopMember, 1}
4
- exists::Array{Bool, 1} #Whether it has been set
5
-
6
- # Arranged by complexity - store one at each.
7
- HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
8
- end
 
 
 
 
 
 
 
 
 
julia/sr.jl DELETED
@@ -1,218 +0,0 @@
1
- import Printf: @printf
2
-
3
- function fullRun(niterations::Integer;
4
- npop::Integer=300,
5
- ncyclesperiteration::Integer=3000,
6
- fractionReplaced::Float32=0.1f0,
7
- verbosity::Integer=0,
8
- topn::Integer=10
9
- )
10
-
11
- testConfiguration()
12
-
13
- # 1. Start a population on every process
14
- allPops = Future[]
15
- # Set up a channel to send finished populations back to head node
16
- channels = [RemoteChannel(1) for j=1:npopulations]
17
- bestSubPops = [Population(1) for j=1:npopulations]
18
- hallOfFame = HallOfFame()
19
- frequencyComplexity = ones(Float32, actualMaxsize)
20
- curmaxsize = 3
21
- if warmupMaxsize == 0
22
- curmaxsize = maxsize
23
- end
24
-
25
- for i=1:npopulations
26
- future = @spawnat :any Population(npop, 3)
27
- push!(allPops, future)
28
- end
29
-
30
- # # 2. Start the cycle on every process:
31
- @sync for i=1:npopulations
32
- @async allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
33
- end
34
- println("Started!")
35
- cycles_complete = npopulations * niterations
36
- if warmupMaxsize != 0
37
- curmaxsize += 1
38
- if curmaxsize > maxsize
39
- curmaxsize = maxsize
40
- end
41
- end
42
-
43
- last_print_time = time()
44
- num_equations = 0.0
45
- print_every_n_seconds = 5
46
- equation_speed = Float32[]
47
-
48
- for i=1:npopulations
49
- # Start listening for each population to finish:
50
- @async put!(channels[i], fetch(allPops[i]))
51
- end
52
-
53
- while cycles_complete > 0
54
- @inbounds for i=1:npopulations
55
- # Non-blocking check if a population is ready:
56
- if isready(channels[i])
57
- # Take the fetch operation from the channel since its ready
58
- cur_pop = take!(channels[i])
59
- bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
60
-
61
- #Try normal copy...
62
- bestPops = Population([member for pop in bestSubPops for member in pop.members])
63
-
64
- for member in cur_pop.members
65
- size = countNodes(member.tree)
66
- frequencyComplexity[size] += 1
67
- if member.score < hallOfFame.members[size].score
68
- hallOfFame.members[size] = deepcopy(member)
69
- hallOfFame.exists[size] = true
70
- end
71
- end
72
-
73
- # Dominating pareto curve - must be better than all simpler equations
74
- dominating = PopMember[]
75
- open(hofFile, "w") do io
76
- println(io,"Complexity|MSE|Equation")
77
- for size=1:actualMaxsize
78
- if hallOfFame.exists[size]
79
- member = hallOfFame.members[size]
80
- if weighted
81
- curMSE = MSE(evalTreeArray(member.tree), y, weights)
82
- else
83
- curMSE = MSE(evalTreeArray(member.tree), y)
84
- end
85
- numberSmallerAndBetter = 0
86
- for i=1:(size-1)
87
- if weighted
88
- hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
89
- else
90
- hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
91
- end
92
- if (hallOfFame.exists[size] && curMSE > hofMSE)
93
- numberSmallerAndBetter += 1
94
- end
95
- end
96
- betterThanAllSmaller = (numberSmallerAndBetter == 0)
97
- if betterThanAllSmaller
98
- println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
99
- push!(dominating, member)
100
- end
101
- end
102
- end
103
- end
104
- cp(hofFile, hofFile*".bkup", force=true)
105
-
106
- # Try normal copy otherwise.
107
- if migration
108
- for k in rand(1:npop, round(Integer, npop*fractionReplaced))
109
- to_copy = rand(1:size(bestPops.members)[1])
110
- cur_pop.members[k] = PopMember(
111
- copyNode(bestPops.members[to_copy].tree),
112
- bestPops.members[to_copy].score)
113
- end
114
- end
115
-
116
- if hofMigration && size(dominating)[1] > 0
117
- for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
118
- # Copy in case one gets used twice
119
- to_copy = rand(1:size(dominating)[1])
120
- cur_pop.members[k] = PopMember(
121
- copyNode(dominating[to_copy].tree)
122
- )
123
- end
124
- end
125
-
126
- @async begin
127
- allPops[i] = @spawnat :any let
128
- tmp_pop = run(cur_pop, ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
129
- @inbounds @simd for j=1:tmp_pop.n
130
- if rand() < 0.1
131
- tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
132
- tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
133
- if shouldOptimizeConstants
134
- tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
135
- end
136
- end
137
- end
138
- tmp_pop = finalizeScores(tmp_pop)
139
- tmp_pop
140
- end
141
- put!(channels[i], fetch(allPops[i]))
142
- end
143
-
144
- cycles_complete -= 1
145
- cycles_elapsed = npopulations * niterations - cycles_complete
146
- if warmupMaxsize != 0 && cycles_elapsed % warmupMaxsize == 0
147
- curmaxsize += 1
148
- if curmaxsize > maxsize
149
- curmaxsize = maxsize
150
- end
151
- end
152
- num_equations += ncyclesperiteration * npop / 10.0
153
- end
154
- end
155
- sleep(1e-3)
156
- elapsed = time() - last_print_time
157
- #Update if time has passed, and some new equations generated.
158
- if elapsed > print_every_n_seconds && num_equations > 0.0
159
- # Dominating pareto curve - must be better than all simpler equations
160
- current_speed = num_equations/elapsed
161
- average_over_m_measurements = 10 #for print_every...=5, this gives 50 second running average
162
- push!(equation_speed, current_speed)
163
- if length(equation_speed) > average_over_m_measurements
164
- deleteat!(equation_speed, 1)
165
- end
166
- average_speed = sum(equation_speed)/length(equation_speed)
167
- curMSE = baselineMSE
168
- lastMSE = curMSE
169
- lastComplexity = 0
170
- if verbosity > 0
171
- @printf("\n")
172
- @printf("Cycles per second: %.3e\n", round(average_speed, sigdigits=3))
173
- cycles_elapsed = npopulations * niterations - cycles_complete
174
- @printf("Progress: %d / %d total iterations (%.3f%%)\n", cycles_elapsed, npopulations * niterations, 100.0*cycles_elapsed/(npopulations*niterations))
175
- @printf("Hall of Fame:\n")
176
- @printf("-----------------------------------------\n")
177
- @printf("%-10s %-8s %-8s %-8s\n", "Complexity", "MSE", "Score", "Equation")
178
- @printf("%-10d %-8.3e %-8.3e %-.f\n", 0, curMSE, 0f0, avgy)
179
- end
180
-
181
- for size=1:actualMaxsize
182
- if hallOfFame.exists[size]
183
- member = hallOfFame.members[size]
184
- if weighted
185
- curMSE = MSE(evalTreeArray(member.tree), y, weights)
186
- else
187
- curMSE = MSE(evalTreeArray(member.tree), y)
188
- end
189
- numberSmallerAndBetter = 0
190
- for i=1:(size-1)
191
- if weighted
192
- hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
193
- else
194
- hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
195
- end
196
- if (hallOfFame.exists[size] && curMSE > hofMSE)
197
- numberSmallerAndBetter += 1
198
- end
199
- end
200
- betterThanAllSmaller = (numberSmallerAndBetter == 0)
201
- if betterThanAllSmaller
202
- delta_c = size - lastComplexity
203
- delta_l_mse = log(curMSE/lastMSE)
204
- score = convert(Float32, -delta_l_mse/delta_c)
205
- if verbosity > 0
206
- @printf("%-10d %-8.3e %-8.3e %-s\n" , size, curMSE, score, stringTree(member.tree))
207
- end
208
- lastMSE = curMSE
209
- lastComplexity = size
210
- end
211
- end
212
- end
213
- debug(verbosity, "")
214
- last_print_time = time()
215
- num_equations = 0.0
216
- end
217
- end
218
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/truth.jl DELETED
@@ -1,77 +0,0 @@
1
- # *** Custom Functions
2
- ##################################################################################################################################
3
- # *** Will somewhere need to define a list TRUTHS of all valid auxliary truths
4
- struct Transformation
5
- type::Integer # 1 is symmetry, 2 is zero, 3 is equality
6
- params::Array{Int32}
7
- Transformation(type::Integer, params::Array{Int32}) = new(type, params)
8
- Transformation(type::Integer, params::Array{Int64}) = new(type, params)
9
-
10
- end
11
- struct Truth
12
- transformation::Transformation
13
- weights::Array{Float32}
14
- Truth(transformation::Transformation, weights::Array{Float32}) = new(transformation, weights)
15
- Truth(type::Int64, params::Array{Int64}, weights::Array{Float32}) = new(Transformation(type, params), weights)
16
- Truth(transformation::Transformation, weights::Array{Float64}) = new(transformation, weights)
17
- Truth(type::Int64, params::Array{Int64}, weights::Array{Float64}) = new(Transformation(type, params), weights)
18
- end
19
- # Returns a linear combination when given X of shape nxd, y of shape nx1 is f(x) and w of shape d+2x1, result is shape nx1
20
- function LinearPrediction(cX::Array{Float32}, cy::Array{Float32}, w::Array{Float32})::Array{Float32}
21
- preds = 0
22
- for i in 1:ndims(cX)
23
- preds = preds .+ cX[:,i].*w[i]
24
- end
25
- preds = preds .+ cy.*w[ndims(cX)+1]
26
- return preds .+ w[ndims(cX)+2]
27
- end
28
-
29
- # Returns a copy of the data with the two specified columns swapped
30
- function swapColumns(cX::Array{Float32, 2}, a::Integer, b::Integer)::Array{Float32, 2}
31
- X1 = copy(cX)
32
- X1[:, a] = cX[:, b]
33
- X1[:, b] = cX[:, a]
34
- return X1
35
- end
36
-
37
- # Returns a copy of the data with the specified integers in the list set to value given
38
- function setVal(cX::Array{Float32, 2}, a::Array{Int32, 1}, val::Float32)::Array{Float32, 2}
39
- X1 = copy(cX)
40
- for i in 1:size(a)[1]
41
- X1[:, a[i]] = fill!(cX[:, a[i]], val)
42
- end
43
- return X1
44
- end
45
-
46
- # Returns a copy of the data with the specified integer indices in the list set to the first item of that list
47
- function setEq(cX::Array{Float32, 2}, a::Array{Int32, 1})::Array{Float32, 2}
48
- X1 = copy(cX)
49
- val = X1[:, a[1]]
50
- for i in 1:size(a)[1]
51
- X1[:, a[i]] = val
52
- end
53
- return X1
54
- end
55
-
56
- # Takes in a dataset and returns the transformed version of it as per the specified type and parameters
57
- function transform(cX::Array{Float32, 2}, transformation::Transformation)::Array{Float32, 2}
58
- if transformation.type==1 # then symmetry
59
- a = transformation.params[1]
60
- b = transformation.params[2]
61
- return swapColumns(cX, a, b)
62
- elseif transformation.type==2 # then zero condition
63
- return setVal(cX, transformation.params, Float32(0))
64
- elseif transformation.type == 3 # then equality condition
65
- return setEq(cX, transformation.params)
66
- else # Then error return X
67
- return cX
68
- end
69
- end
70
- function transform(cX::Array{Float32, 2}, truth::Truth)::Array{Float32, 2}
71
- return transform(cX, truth.transformation)
72
- end
73
-
74
- # Takes in X that has been transformed and returns what the Truth projects the target values should be
75
- function truthPrediction(X_transformed::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Array{Float32}
76
- return LinearPrediction(X_transformed, cy, truth.weights)
77
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/truthPops.jl DELETED
@@ -1,170 +0,0 @@
1
- # Returns the MSE between the predictions and the truth provided targets for the given dataset
2
- function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
3
- transformed = transform(cX, truth)
4
- targets = truthPrediction(transformed, cy, truth)
5
- preds = evalTreeArray(member.tree, transformed)
6
- return MSE(preds, targets)
7
- end
8
-
9
- # Assumes a dataset X, y for a given truth
10
- function truthScore(member::PopMember, truth::Truth)::Float32
11
- return truthScore(member, X, y, truth)
12
- end
13
-
14
- # Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
15
- function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
16
- s = 0
17
- for truth in TRUTHS
18
- s += (truthScore(member, cX, cy, truth))/size(TRUTHS)[1]
19
- end
20
- return s
21
- end
22
-
23
- # Assumes list of Truths TRUTHS and dataset X, y are defined
24
- function truthScore(member::PopMember)::Float32
25
- return truthScore(member, X, y)
26
- end
27
- # Returns the MSE between the predictions and the truth provided targets for the given dataset
28
- function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
29
- transformed = transform(cX, truth)
30
- targets = truthPrediction(transformed, cy, truth)
31
- preds = evalTreeArray(tree, transformed)
32
- return MSE(preds, targets)
33
- end
34
-
35
- # Assumes a dataset X, y for a given truth
36
- function truthScore(tree::Node, truth::Truth)::Float32
37
- return truthScore(tree, X, y, truth)
38
- end
39
-
40
- # Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
41
- function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
42
- s = 0
43
- for truth in TRUTHS
44
- s += (truthScore(tree, cX, cy, truth))/size(TRUTHS)[1]
45
- end
46
- return s
47
- end
48
-
49
- # Assumes list of Truths TRUTHS and dataset X, y are defined
50
- function truthScore(tree::Node)::Float32
51
- return truthScore(tree, X, y)
52
- end
53
-
54
- # Returns true iff Truth Score is below a given threshold i.e truth is satisfied
55
- function testTruth(member::PopMember, truth::Truth, threshold::Float32=Float32(1.0e-8))::Bool
56
- truthError = truthScore(member, truth)
57
- #print(stringTree(member.tree), "\n")
58
- #print(truth, ": ")
59
- #print(truthError, "\n")
60
- if truthError > threshold
61
- #print("Returns False \n ----\n")
62
- return false
63
- else
64
- #print("Returns True \n ----\n")
65
- return true
66
- end
67
- end
68
-
69
- # Returns a list of violating functions from assumed list TRUTHS
70
- function violatingTruths(member::PopMember)::Array{Truth}
71
- return violatingTruths(member.tree)
72
- end
73
-
74
- # Returns true iff Truth Score is below a given threshold i.e truth is satisfied
75
- function testTruth(tree::Node, truth::Truth, threshold::Float32=Float32(1.0e-3))::Bool
76
- truthError = truthScore(tree, truth)
77
- if truthError > threshold
78
- return false
79
- else
80
- return true
81
- end
82
- end
83
-
84
- # Returns a list of violating functions from assumed list TRUTHS
85
- function violatingTruths(tree::Node)::Array{Truth}
86
- toReturn = []
87
- #print("\n Checking Equation ", stringTree(tree), "\n")
88
- for truth in TRUTHS
89
- test_truth = testTruth(tree, truth)
90
- #print("Truth: ", truth, ": " , test_truth, "\n-----\n")
91
- if !test_truth
92
- append!(toReturn, [truth])
93
- end
94
- end
95
- return toReturn
96
- end
97
-
98
- function randomIndex(cX::Array{Float32, 2}, k::Integer=10)::Array{Int32, 1}
99
- indxs = sample([Int32(i) for i in 1:size(cX)[1]], k)
100
- return indxs
101
- end
102
-
103
- function randomIndex(leng::Integer, k::Integer=10)::Array{Int32, 1}
104
- indxs = sample([Int32(i) for i in 1:leng], k)
105
- return indxs
106
- end
107
-
108
- function extendedX(cX::Array{Float32, 2}, truth::Truth, indx::Array{Int32, 1})::Array{Float32, 2}
109
- workingcX = copy(cX)
110
- X_slice = workingcX[indx, :]
111
- X_transformed = transform(X_slice, truth)
112
- return X_transformed
113
- end
114
- function extendedX(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
115
- return extendedX(OGX, truth, indx)
116
- end
117
- function extendedX(cX::Array{Float32, 2}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
118
- if length(violatedTruths) == 0
119
- return nothing
120
- end
121
- workingX = extendedX(cX, violatedTruths[1], indx)
122
- for truth in violatedTruths[2:length(violatedTruths)]
123
- workingX = vcat(workingX, extendedX(cX, truth, indx))
124
- end
125
- return workingX
126
- end
127
- function extendedX(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
128
- return extendedX(OGX, violatedTruths, indx)
129
- end
130
- function extendedX(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
131
- violatedTruths = violatingTruths(tree)
132
- return extendedX(violatedTruths, indx)
133
- end
134
- function extendedX(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
135
- return extendedX(member.tree, indx)
136
- end
137
-
138
-
139
- function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
140
- cy = copy(cy)
141
- cX = copy(cX)
142
- X_slice = cX[indx, :]
143
- y_slice = cy[indx]
144
- X_transformed = transform(X_slice, truth)
145
- y_transformed = truthPrediction(X_transformed, y_slice, truth)
146
- return y_transformed
147
- end
148
- function extendedy(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
149
- return extendedy(OGX, OGy, truth, indx)
150
- end
151
- function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
152
- if length(violatedTruths) == 0
153
- return nothing
154
- end
155
- workingy = extendedy(cX, cy, violatedTruths[1], indx)
156
- for truth in violatedTruths[2:length(violatedTruths)]
157
- workingy = vcat(workingy, extendedy(cX, cy, truth, indx))
158
- end
159
- return workingy
160
- end
161
- function extendedy(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
162
- return extendedy(OGX,OGy, violatedTruths, indx)
163
- end
164
- function extendedy(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
165
- violatedTruths = violatingTruths(tree)
166
- return extendedy(violatedTruths, indx)
167
- end
168
- function extendedy(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
169
- return extendedy(member.tree, indx)
170
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pysr/sr.py CHANGED
@@ -100,13 +100,13 @@ def pysr(X=None, y=None, weights=None,
100
  useFrequency=False,
101
  tempdir=None,
102
  delete_tempfiles=True,
103
- limitPowComplexity=False, #deprecated
104
- threads=None, #deprecated
105
  julia_optimization=3,
 
 
106
  ):
107
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
108
  Note: most default parameters have been tuned over several example
109
- equations, but you should adjust `threads`, `niterations`,
110
  `binary_operators`, `unary_operators` to your requirements.
111
 
112
  :param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
@@ -188,12 +188,16 @@ def pysr(X=None, y=None, weights=None,
188
  :param julia_optimization: int, Optimization level (0, 1, 2, 3)
189
  :param tempdir: str or None, directory for the temporary files
190
  :param delete_tempfiles: bool, whether to delete the temporary files after finishing
 
 
 
 
 
 
191
  :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
192
  (as strings).
193
 
194
  """
195
- _raise_depreciation_errors(limitPowComplexity, threads)
196
-
197
  if isinstance(X, pd.DataFrame):
198
  variable_names = list(X.columns)
199
  X = np.array(X)
@@ -239,8 +243,7 @@ def pysr(X=None, y=None, weights=None,
239
  topn=topn, verbosity=verbosity,
240
  julia_optimization=julia_optimization, timeout=timeout,
241
  fractionReplacedHof=fractionReplacedHof,
242
- hofMigration=hofMigration,
243
- limitPowComplexity=limitPowComplexity, maxdepth=maxdepth,
244
  maxsize=maxsize, migration=migration, nrestarts=nrestarts,
245
  parsimony=parsimony, perturbationFactor=perturbationFactor,
246
  populations=populations, procs=procs,
@@ -257,17 +260,24 @@ def pysr(X=None, y=None, weights=None,
257
  weightRandomize=weightRandomize,
258
  weightSimplify=weightSimplify,
259
  constraints=constraints,
260
- extra_sympy_mappings=extra_sympy_mappings)
 
261
 
262
  kwargs = {**_set_paths(tempdir), **kwargs}
263
 
264
- kwargs['def_hyperparams'] = _metaprogram_fast_operator(**kwargs)
 
 
 
 
 
 
 
265
 
266
  _handle_constraints(**kwargs)
267
 
268
  kwargs['constraints_str'] = _make_constraints_str(**kwargs)
269
  kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
270
- kwargs['def_auxiliary'] = _make_auxiliary_julia_str(**kwargs)
271
  kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
272
 
273
  _create_julia_files(**kwargs)
@@ -280,13 +290,6 @@ def pysr(X=None, y=None, weights=None,
280
  return get_hof(**kwargs)
281
 
282
 
283
- def _make_auxiliary_julia_str(julia_auxiliary_filenames, **kwargs):
284
- def_auxiliary = '\n'.join([
285
- f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
286
- ])
287
- return def_auxiliary
288
-
289
-
290
  def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
291
  global global_n_features
292
  global global_equation_file
@@ -298,14 +301,16 @@ def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwarg
298
  global_extra_sympy_mappings = extra_sympy_mappings
299
 
300
 
301
- def _final_pysr_process(julia_optimization, procs, runfile_filename, timeout, **kwargs):
302
  command = [
303
  f'julia', f'-O{julia_optimization:d}',
304
- f'-p', f'{procs}',
305
  str(runfile_filename),
306
  ]
307
  if timeout is not None:
308
  command = [f'timeout', f'{timeout}'] + command
 
 
 
309
  print("Running on", ' '.join(command))
310
  process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
311
  try:
@@ -320,73 +325,108 @@ def _final_pysr_process(julia_optimization, procs, runfile_filename, timeout, **
320
  print("Killing process... will return when done.")
321
  process.kill()
322
 
323
-
324
- def _create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
325
- ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity, **kwargs):
 
326
  with open(hyperparam_filename, 'w') as f:
327
  print(def_hyperparams, file=f)
328
  with open(dataset_filename, 'w') as f:
329
  print(def_datasets, file=f)
330
- with open(auxiliary_filename, 'w') as f:
331
- print(def_auxiliary, file=f)
332
  with open(runfile_filename, 'w') as f:
333
- print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
334
- print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
335
- print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
336
- print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
337
- print(
338
- f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
339
- file=f)
340
- print(f'rmprocs(nprocs)', file=f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
 
343
  def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
344
  def_datasets = """using DelimitedFiles"""
345
  np.savetxt(X_filename, X, delimiter=',')
346
- np.savetxt(y_filename, y, delimiter=',')
347
  if weights is not None:
348
- np.savetxt(weights_filename, weights, delimiter=',')
349
  def_datasets += f"""
350
- const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
351
- const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
352
  if weights is not None:
353
  def_datasets += f"""
354
- const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
355
  return def_datasets
356
 
357
-
358
  def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
359
  def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
360
- limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
361
  parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
362
- unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
 
 
363
  weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
364
  weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
365
- def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  {constraints_str}
367
- const binops = {'[' + ', '.join(binary_operators) + ']'}
368
- const unaops = {'[' + ', '.join(unary_operators) + ']'}
369
- const ns=10;
370
- const parsimony = {parsimony:f}f0
371
- const alpha = {alpha:f}f0
372
- const maxsize = {maxsize:d}
373
- const maxdepth = {maxdepth:d}
374
- const fast_cycle = {'true' if fast_cycle else 'false'}
375
- const migration = {'true' if migration else 'false'}
376
- const hofMigration = {'true' if hofMigration else 'false'}
377
- const fractionReplacedHof = {fractionReplacedHof}f0
378
- const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
379
- const hofFile = "{equation_file}"
380
- const nprocs = {procs:d}
381
- const npopulations = {populations:d}
382
- const nrestarts = {nrestarts:d}
383
- const perturbationFactor = {perturbationFactor:f}f0
384
- const annealing = {"true" if annealing else "false"}
385
- const weighted = {"true" if weights is not None else "false"}
386
- const batching = {"true" if batching else "false"}
387
- const batchSize = {min([batchSize, len(X)]) if batching else len(X):d}
388
- const useVarMap = {"true" if use_custom_variable_names else "false"}
389
- const mutationWeights = [
390
  {weightMutateConstant:f},
391
  {weightMutateOperator:f},
392
  {weightAddNode:f},
@@ -395,53 +435,25 @@ const mutationWeights = [
395
  {weightSimplify:f},
396
  {weightRandomize:f},
397
  {weightDoNothing:f}
398
- ]
399
- const warmupMaxsize = {warmupMaxsize:d}
400
- const limitPowComplexity = {"true" if limitPowComplexity else "false"}
401
- const useFrequency = {"true" if useFrequency else "false"}
 
 
 
 
402
  """
403
- op_runner = ""
404
- if len(binary_operators) > 0:
405
- op_runner += """
406
- @inline function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int)
407
- if i === 1
408
- @inbounds @simd for j=1:clen
409
- x[j] = """f"{binary_operators[0]}""""(x[j], y[j])
410
- end"""
411
- for i in range(1, len(binary_operators)):
412
- op_runner += f"""
413
- elseif i === {i + 1}
414
- @inbounds @simd for j=1:clen
415
- x[j] = {binary_operators[i]}(x[j], y[j])
416
- end"""
417
- op_runner += """
418
- end
419
- end"""
420
- if len(unary_operators) > 0:
421
- op_runner += """
422
- @inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
423
- if i === 1
424
- @inbounds @simd for j=1:clen
425
- x[j] = """f"{unary_operators[0]}(x[j])""""
426
- end"""
427
- for i in range(1, len(unary_operators)):
428
- op_runner += f"""
429
- elseif i === {i + 1}
430
- @inbounds @simd for j=1:clen
431
- x[j] = {unary_operators[i]}(x[j])
432
- end"""
433
- op_runner += """
434
- end
435
- end"""
436
- def_hyperparams += op_runner
437
  if use_custom_variable_names:
438
- def_hyperparams += f"""
439
- const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
 
440
  return def_hyperparams
441
 
442
 
443
  def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
444
- constraints_str = "const una_constraints = ["
445
  first = True
446
  for op in unary_operators:
447
  val = constraints[op]
@@ -449,8 +461,8 @@ def _make_constraints_str(binary_operators, constraints, unary_operators, **kwar
449
  constraints_str += ", "
450
  constraints_str += f"{val:d}"
451
  first = False
452
- constraints_str += """]
453
- const bin_constraints = ["""
454
  first = True
455
  for op in binary_operators:
456
  tup = constraints[op]
@@ -458,7 +470,7 @@ const bin_constraints = ["""
458
  constraints_str += ", "
459
  constraints_str += f"({tup[0]:d}, {tup[1]:d})"
460
  first = False
461
- constraints_str += "]"
462
  return constraints_str
463
 
464
 
@@ -481,7 +493,7 @@ def _handle_constraints(binary_operators, constraints, unary_operators, **kwargs
481
  constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
482
 
483
 
484
- def _metaprogram_fast_operator(binary_operators, unary_operators, **kwargs):
485
  def_hyperparams = ""
486
  for op_list in [binary_operators, unary_operators]:
487
  for i in range(len(op_list)):
@@ -529,35 +541,20 @@ def _handle_feature_selection(X, select_k_features, use_custom_variable_names, v
529
 
530
  def _set_paths(tempdir):
531
  # System-independent paths
532
- pkg_directory = Path(__file__).parents[1] / 'julia'
533
- pkg_filename = pkg_directory / "sr.jl"
534
- operator_filename = pkg_directory / "Operators.jl"
535
- julia_auxiliaries = [
536
- "Equation.jl", "ProgramConstants.jl",
537
- "LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
538
- "MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
539
- "HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
540
- "Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
541
- "ConstantOptimization.jl"
542
- ]
543
- julia_auxiliary_filenames = [
544
- pkg_directory / fname
545
- for fname in julia_auxiliaries
546
- ]
547
-
548
  tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
549
  hyperparam_filename = tmpdir / f'hyperparams.jl'
550
  dataset_filename = tmpdir / f'dataset.jl'
551
- auxiliary_filename = tmpdir / f'auxiliary.jl'
552
  runfile_filename = tmpdir / f'runfile.jl'
553
  X_filename = tmpdir / "X.csv"
554
  y_filename = tmpdir / "y.csv"
555
  weights_filename = tmpdir / "weights.csv"
556
- return dict(auxiliary_filename=auxiliary_filename, X_filename=X_filename,
 
 
557
  dataset_filename=dataset_filename,
558
  hyperparam_filename=hyperparam_filename,
559
- julia_auxiliary_filenames=julia_auxiliary_filenames,
560
- operator_filename=operator_filename, pkg_filename=pkg_filename,
561
  runfile_filename=runfile_filename, tmpdir=tmpdir,
562
  weights_filename=weights_filename, y_filename=y_filename)
563
 
@@ -575,13 +572,6 @@ def _check_assertions(X, binary_operators, unary_operators, use_custom_variable_
575
  assert len(variable_names) == X.shape[1]
576
 
577
 
578
- def _raise_depreciation_errors(limitPowComplexity, threads):
579
- if threads is not None:
580
- raise ValueError("The threads kwarg is deprecated. Use procs.")
581
- if limitPowComplexity:
582
- raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
583
-
584
-
585
  def run_feature_selection(X, y, select_k_features):
586
  """Use a gradient boosting tree regressor as a proxy for finding
587
  the k most important features in X, returning indices for those
@@ -695,3 +685,15 @@ def _escape_filename(filename):
695
  repr = str(filename)
696
  repr = repr.replace('\\', '\\\\')
697
  return repr
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  useFrequency=False,
101
  tempdir=None,
102
  delete_tempfiles=True,
 
 
103
  julia_optimization=3,
104
+ julia_project=None,
105
+ user_input=True
106
  ):
107
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
108
  Note: most default parameters have been tuned over several example
109
+ equations, but you should adjust `niterations`,
110
  `binary_operators`, `unary_operators` to your requirements.
111
 
112
  :param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
 
188
  :param julia_optimization: int, Optimization level (0, 1, 2, 3)
189
  :param tempdir: str or None, directory for the temporary files
190
  :param delete_tempfiles: bool, whether to delete the temporary files after finishing
191
+ :param julia_project: str or None, a Julia environment location containing
192
+ a Project.toml (and potentially the source code for SymbolicRegression.jl).
193
+ Default gives the Python package directory, where a Project.toml file
194
+ should be present from the install.
195
+ :param user_input: Whether to ask for user input or not for installing (to
196
+ be used for automated scripts). Will choose to install when asked.
197
  :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
198
  (as strings).
199
 
200
  """
 
 
201
  if isinstance(X, pd.DataFrame):
202
  variable_names = list(X.columns)
203
  X = np.array(X)
 
243
  topn=topn, verbosity=verbosity,
244
  julia_optimization=julia_optimization, timeout=timeout,
245
  fractionReplacedHof=fractionReplacedHof,
246
+ hofMigration=hofMigration, maxdepth=maxdepth,
 
247
  maxsize=maxsize, migration=migration, nrestarts=nrestarts,
248
  parsimony=parsimony, perturbationFactor=perturbationFactor,
249
  populations=populations, procs=procs,
 
260
  weightRandomize=weightRandomize,
261
  weightSimplify=weightSimplify,
262
  constraints=constraints,
263
+ extra_sympy_mappings=extra_sympy_mappings,
264
+ julia_project=julia_project)
265
 
266
  kwargs = {**_set_paths(tempdir), **kwargs}
267
 
268
+ pkg_directory = kwargs['pkg_directory']
269
+ kwargs['need_install'] = False
270
+ if not (pkg_directory / 'Manifest.toml').is_file():
271
+ kwargs['need_install'] = (not user_input) or _yesno("I will install Julia packages using PySR's Project.toml file. OK?")
272
+ if kwargs['need_install']:
273
+ print("OK. I will install at launch.")
274
+
275
+ kwargs['def_hyperparams'] = _create_inline_operators(**kwargs)
276
 
277
  _handle_constraints(**kwargs)
278
 
279
  kwargs['constraints_str'] = _make_constraints_str(**kwargs)
280
  kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
 
281
  kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
282
 
283
  _create_julia_files(**kwargs)
 
290
  return get_hof(**kwargs)
291
 
292
 
 
 
 
 
 
 
 
293
  def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
294
  global global_n_features
295
  global global_equation_file
 
301
  global_extra_sympy_mappings = extra_sympy_mappings
302
 
303
 
304
+ def _final_pysr_process(julia_optimization, runfile_filename, timeout, **kwargs):
305
  command = [
306
  f'julia', f'-O{julia_optimization:d}',
 
307
  str(runfile_filename),
308
  ]
309
  if timeout is not None:
310
  command = [f'timeout', f'{timeout}'] + command
311
+ _cmd_runner(command)
312
+
313
+ def _cmd_runner(command):
314
  print("Running on", ' '.join(command))
315
  process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
316
  try:
 
325
  print("Killing process... will return when done.")
326
  process.kill()
327
 
328
+ def _create_julia_files(dataset_filename, def_datasets, hyperparam_filename, def_hyperparams,
329
+ fractionReplaced, ncyclesperiteration, niterations, npop,
330
+ runfile_filename, topn, verbosity, julia_project, procs, weights,
331
+ X, variable_names, pkg_directory, need_install, **kwargs):
332
  with open(hyperparam_filename, 'w') as f:
333
  print(def_hyperparams, file=f)
334
  with open(dataset_filename, 'w') as f:
335
  print(def_datasets, file=f)
 
 
336
  with open(runfile_filename, 'w') as f:
337
+ if julia_project is None:
338
+ julia_project = pkg_directory
339
+ else:
340
+ julia_project = Path(julia_project)
341
+ print(f'import Pkg', file=f)
342
+ print(f'Pkg.activate("{_escape_filename(julia_project)}")', file=f)
343
+ if need_install:
344
+ print(f'Pkg.add("SymbolicRegression")', file=f)
345
+ print(f'Pkg.instantiate()', file=f)
346
+ print(f'Pkg.precompile()', file=f)
347
+ print(f'using SymbolicRegression', file=f)
348
+ print(f'include("{_escape_filename(hyperparam_filename)}")', file=f)
349
+ print(f'include("{_escape_filename(dataset_filename)}")', file=f)
350
+ if len(variable_names) == 0:
351
+ varMap = "[" + ",".join([f'"x{i}"' for i in range(X.shape[1])]) + "]"
352
+ else:
353
+ varMap = "[" + ",".join(variable_names) + "]"
354
+
355
+ if weights is not None:
356
+ print(f'EquationSearch(X, y, weights=weights, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
357
+ else:
358
+ print(f'EquationSearch(X, y, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
359
 
360
 
361
  def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
362
  def_datasets = """using DelimitedFiles"""
363
  np.savetxt(X_filename, X, delimiter=',')
364
+ np.savetxt(y_filename, y.reshape(-1, 1), delimiter=',')
365
  if weights is not None:
366
+ np.savetxt(weights_filename, weights.reshape(-1, 1), delimiter=',')
367
  def_datasets += f"""
368
+ X = copy(transpose(readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')))
369
+ y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')[:, 1]"""
370
  if weights is not None:
371
  def_datasets += f"""
372
+ weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')[:, 1]"""
373
  return def_datasets
374
 
 
375
  def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
376
  def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
377
+ maxdepth, maxsize, migration, nrestarts, npop,
378
  parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
379
+ unary_operators, useFrequency, use_custom_variable_names,
380
+ variable_names, warmupMaxsize, weightAddNode,
381
+ ncyclesperiteration, fractionReplaced, topn, verbosity,
382
  weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
383
  weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
384
+ def tuple_fix(ops):
385
+ if len(ops) > 1:
386
+ return ', '.join(ops)
387
+ elif len(ops) == 0:
388
+ return ''
389
+ else:
390
+ return ops[0] + ','
391
+
392
+ def_hyperparams += f"""\n
393
+ plus=(+)
394
+ sub=(-)
395
+ mult=(*)
396
+ square=SymbolicRegression.square
397
+ cube=SymbolicRegression.cube
398
+ pow=(^)
399
+ div=(/)
400
+ logm=SymbolicRegression.logm
401
+ logm2=SymbolicRegression.logm2
402
+ logm10=SymbolicRegression.logm10
403
+ sqrtm=SymbolicRegression.sqrtm
404
+ neg=SymbolicRegression.neg
405
+ greater=SymbolicRegression.greater
406
+ relu=SymbolicRegression.relu
407
+ logical_or=SymbolicRegression.logical_or
408
+ logical_and=SymbolicRegression.logical_and
409
+
410
+ options = SymbolicRegression.Options(binary_operators={'(' + tuple_fix(binary_operators) + ')'},
411
+ unary_operators={'(' + tuple_fix(unary_operators) + ')'},
412
  {constraints_str}
413
+ parsimony={parsimony:f}f0,
414
+ alpha={alpha:f}f0,
415
+ maxsize={maxsize:d},
416
+ maxdepth={maxdepth:d},
417
+ fast_cycle={'true' if fast_cycle else 'false'},
418
+ migration={'true' if migration else 'false'},
419
+ hofMigration={'true' if hofMigration else 'false'},
420
+ fractionReplacedHof={fractionReplacedHof}f0,
421
+ shouldOptimizeConstants={'true' if shouldOptimizeConstants else 'false'},
422
+ hofFile="{equation_file}",
423
+ npopulations={populations:d},
424
+ nrestarts={nrestarts:d},
425
+ perturbationFactor={perturbationFactor:f}f0,
426
+ annealing={"true" if annealing else "false"},
427
+ batching={"true" if batching else "false"},
428
+ batchSize={min([batchSize, len(X)]) if batching else len(X):d},
429
+ mutationWeights=[
 
 
 
 
 
 
430
  {weightMutateConstant:f},
431
  {weightMutateOperator:f},
432
  {weightAddNode:f},
 
435
  {weightSimplify:f},
436
  {weightRandomize:f},
437
  {weightDoNothing:f}
438
+ ],
439
+ warmupMaxsize={warmupMaxsize:d},
440
+ useFrequency={"true" if useFrequency else "false"},
441
+ npop={npop:d},
442
+ ncyclesperiteration={ncyclesperiteration:d},
443
+ fractionReplaced={fractionReplaced:f}f0,
444
+ topn={topn:d},
445
+ verbosity=round(Int32, {verbosity:f})
446
  """
447
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  if use_custom_variable_names:
449
+ def_hyperparams += f""",
450
+ varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
451
+ def_hyperparams += '\n)'
452
  return def_hyperparams
453
 
454
 
455
  def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
456
+ constraints_str = "una_constraints = ["
457
  first = True
458
  for op in unary_operators:
459
  val = constraints[op]
 
461
  constraints_str += ", "
462
  constraints_str += f"{val:d}"
463
  first = False
464
+ constraints_str += """],
465
+ bin_constraints = ["""
466
  first = True
467
  for op in binary_operators:
468
  tup = constraints[op]
 
470
  constraints_str += ", "
471
  constraints_str += f"({tup[0]:d}, {tup[1]:d})"
472
  first = False
473
+ constraints_str += "],"
474
  return constraints_str
475
 
476
 
 
493
  constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
494
 
495
 
496
+ def _create_inline_operators(binary_operators, unary_operators, **kwargs):
497
  def_hyperparams = ""
498
  for op_list in [binary_operators, unary_operators]:
499
  for i in range(len(op_list)):
 
541
 
542
  def _set_paths(tempdir):
543
  # System-independent paths
544
+ pkg_directory = Path(__file__).parents[1]
545
+ default_project_file = pkg_directory / "Project.toml"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
547
  hyperparam_filename = tmpdir / f'hyperparams.jl'
548
  dataset_filename = tmpdir / f'dataset.jl'
 
549
  runfile_filename = tmpdir / f'runfile.jl'
550
  X_filename = tmpdir / "X.csv"
551
  y_filename = tmpdir / "y.csv"
552
  weights_filename = tmpdir / "weights.csv"
553
+ return dict(pkg_directory=pkg_directory,
554
+ default_project_file=default_project_file,
555
+ X_filename=X_filename,
556
  dataset_filename=dataset_filename,
557
  hyperparam_filename=hyperparam_filename,
 
 
558
  runfile_filename=runfile_filename, tmpdir=tmpdir,
559
  weights_filename=weights_filename, y_filename=y_filename)
560
 
 
572
  assert len(variable_names) == X.shape[1]
573
 
574
 
 
 
 
 
 
 
 
575
  def run_feature_selection(X, y, select_k_features):
576
  """Use a gradient boosting tree regressor as a proxy for finding
577
  the k most important features in X, returning indices for those
 
685
  repr = str(filename)
686
  repr = repr.replace('\\', '\\\\')
687
  return repr
688
+
689
+ # https://gist.github.com/garrettdreyfus/8153571
690
+ def _yesno(question):
691
+ """Simple Yes/No Function."""
692
+ prompt = f'{question} (y/n): '
693
+ ans = input(prompt).strip().lower()
694
+ if ans not in ['y', 'n']:
695
+ print(f'{ans} is invalid, please try again...')
696
+ return _yesno(question)
697
+ if ans == 'y':
698
+ return True
699
+ return False
setup.py CHANGED
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
 
6
  setuptools.setup(
7
  name="pysr", # Replace with your own username
8
- version="0.3.37",
9
  author="Miles Cranmer",
10
  author_email="[email protected]",
11
  description="Simple and efficient symbolic regression",
@@ -19,7 +19,7 @@ setuptools.setup(
19
  ],
20
  packages=setuptools.find_packages(),
21
  package_data={
22
- 'pysr': ['../julia/*.jl']
23
  },
24
  include_package_data=False,
25
  classifiers=[
 
5
 
6
  setuptools.setup(
7
  name="pysr", # Replace with your own username
8
+ version="0.4.0",
9
  author="Miles Cranmer",
10
  author_email="[email protected]",
11
  description="Simple and efficient symbolic regression",
 
19
  ],
20
  packages=setuptools.find_packages(),
21
  package_data={
22
+ 'pysr': ['../Project.toml']
23
  },
24
  include_package_data=False,
25
  classifiers=[
test/test.py CHANGED
@@ -7,16 +7,16 @@ y = X[:, 0]
7
  equations = pysr(X, y,
8
  niterations=10)
9
  print(equations)
10
- assert equations.iloc[-1]['MSE'] < 1e-10
11
 
12
  print("Test 2 - test custom operator")
13
  y = X[:, 0]**2
14
  equations = pysr(X, y,
15
- unary_operators=["square(x) = x^2"], binary_operators=["plus"],
16
  extra_sympy_mappings={'square': lambda x: x**2},
17
  niterations=10)
18
  print(equations)
19
- assert equations.iloc[-1]['MSE'] < 1e-10
20
 
21
  X = np.random.randn(100, 1)
22
  y = X[:, 0] + 3.0
@@ -26,4 +26,4 @@ equations = pysr(X, y,
26
  niterations=10)
27
 
28
  print(equations)
29
- assert equations.iloc[-1]['MSE'] < 1e-10
 
7
  equations = pysr(X, y,
8
  niterations=10)
9
  print(equations)
10
+ assert equations.iloc[-1]['MSE'] < 1e-4
11
 
12
  print("Test 2 - test custom operator")
13
  y = X[:, 0]**2
14
  equations = pysr(X, y,
15
+ unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
16
  extra_sympy_mappings={'square': lambda x: x**2},
17
  niterations=10)
18
  print(equations)
19
+ assert equations.iloc[-1]['MSE'] < 1e-4
20
 
21
  X = np.random.randn(100, 1)
22
  y = X[:, 0] + 3.0
 
26
  niterations=10)
27
 
28
  print(equations)
29
+ assert equations.iloc[-1]['MSE'] < 1e-4