Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Merge pull request #26 from MilesCranmer/separate-packages
Browse files- .travis.yml +1 -1
- Project.toml +6 -0
- README.md +5 -12
- TODO.md +2 -0
- julia/CheckConstraints.jl +0 -42
- julia/ConstantOptimization.jl +0 -49
- julia/Equation.jl +0 -173
- julia/EvaluateEquation.jl +0 -47
- julia/LossFunctions.jl +0 -82
- julia/Mutate.jl +0 -124
- julia/MutationFunctions.jl +0 -239
- julia/Operators.jl +0 -56
- julia/PopMember.jl +0 -10
- julia/Population.jl +0 -40
- julia/ProgramConstants.jl +0 -9
- julia/RegularizedEvolution.jl +0 -46
- julia/SimplifyEquation.jl +0 -106
- julia/SingleIteration.jl +0 -28
- julia/Utils.jl +0 -34
- julia/halloffame.jl +0 -8
- julia/sr.jl +0 -218
- julia/truth.jl +0 -77
- julia/truthPops.jl +0 -170
- pysr/sr.py +138 -136
- setup.py +2 -2
- test/test.py +4 -4
.travis.yml
CHANGED
@@ -19,7 +19,7 @@ jobs:
|
|
19 |
install: pip3 install --upgrade pip
|
20 |
|
21 |
before_script:
|
22 |
-
- julia --color=yes -e 'using Pkg; pkg"add
|
23 |
|
24 |
script:
|
25 |
- pip3 install numpy pandas
|
|
|
19 |
install: pip3 install --upgrade pip
|
20 |
|
21 |
before_script:
|
22 |
+
- julia --color=yes -e 'using Pkg; pkg"add SymbolicRegression; precompile;"'
|
23 |
|
24 |
script:
|
25 |
- pip3 install numpy pandas
|
Project.toml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[deps]
|
2 |
+
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
3 |
+
|
4 |
+
[compat]
|
5 |
+
SymbolicRegression = "0.4"
|
6 |
+
julia = "1"
|
README.md
CHANGED
@@ -14,7 +14,7 @@ Uses regularized evolution, simulated annealing, and gradient-free optimization.
|
|
14 |
[Documentation](https://pysr.readthedocs.io/en/latest)
|
15 |
|
16 |
Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
|
17 |
-
the pure-Julia
|
18 |
|
19 |
Symbolic regression is a very interpretable machine learning algorithm
|
20 |
for low-dimensional problems: these tools search equation space
|
@@ -51,22 +51,15 @@ Install Julia - see [downloads](https://julialang.org/downloads/), and
|
|
51 |
then instructions for [mac](https://julialang.org/downloads/platform/#macos)
|
52 |
and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
|
53 |
(Don't use the `conda-forge` version; it doesn't seem to work properly.)
|
54 |
-
Then, at the command line,
|
55 |
-
install and precompile the `Optim` and `SpecialFunctions`
|
56 |
-
packages via:
|
57 |
-
|
58 |
-
```bash
|
59 |
-
julia -e 'using Pkg; pkg"add Optim; add SpecialFunctions; precompile;"'
|
60 |
-
```
|
61 |
-
|
62 |
-
For python, you need to have Python 3, numpy, sympy, and pandas installed.
|
63 |
-
|
64 |
-
You can install this package from PyPI with:
|
65 |
|
|
|
66 |
```bash
|
67 |
pip install pysr
|
68 |
```
|
69 |
|
|
|
|
|
|
|
70 |
# Quickstart
|
71 |
|
72 |
Here is some demo code (also found in `example.py`)
|
|
|
14 |
[Documentation](https://pysr.readthedocs.io/en/latest)
|
15 |
|
16 |
Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
|
17 |
+
the pure-Julia backend of this package.
|
18 |
|
19 |
Symbolic regression is a very interpretable machine learning algorithm
|
20 |
for low-dimensional problems: these tools search equation space
|
|
|
51 |
then instructions for [mac](https://julialang.org/downloads/platform/#macos)
|
52 |
and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
|
53 |
(Don't use the `conda-forge` version; it doesn't seem to work properly.)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
You can install PySR with:
|
56 |
```bash
|
57 |
pip install pysr
|
58 |
```
|
59 |
|
60 |
+
The first launch will automatically install the Julia packages
|
61 |
+
required.
|
62 |
+
|
63 |
# Quickstart
|
64 |
|
65 |
Here is some demo code (also found in `example.py`)
|
TODO.md
CHANGED
@@ -62,6 +62,8 @@
|
|
62 |
- [x] Sympy printing
|
63 |
- [ ] Sort these todo lists by priority
|
64 |
|
|
|
|
|
65 |
## Feature ideas
|
66 |
|
67 |
- [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
|
|
|
62 |
- [x] Sympy printing
|
63 |
- [ ] Sort these todo lists by priority
|
64 |
|
65 |
+
- [ ] Automatically convert log, log10, log2, pow to the correct operators.
|
66 |
+
|
67 |
## Feature ideas
|
68 |
|
69 |
- [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).
|
julia/CheckConstraints.jl
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
# Check if any binary operator are overly complex
|
2 |
-
function flagBinOperatorComplexity(tree::Node, op::Int)::Bool
|
3 |
-
if tree.degree == 0
|
4 |
-
return false
|
5 |
-
elseif tree.degree == 1
|
6 |
-
return flagBinOperatorComplexity(tree.l, op)
|
7 |
-
else
|
8 |
-
if tree.op == op
|
9 |
-
overly_complex = (
|
10 |
-
((bin_constraints[op][1] > -1) &&
|
11 |
-
(countNodes(tree.l) > bin_constraints[op][1]))
|
12 |
-
||
|
13 |
-
((bin_constraints[op][2] > -1) &&
|
14 |
-
(countNodes(tree.r) > bin_constraints[op][2]))
|
15 |
-
)
|
16 |
-
if overly_complex
|
17 |
-
return true
|
18 |
-
end
|
19 |
-
end
|
20 |
-
return (flagBinOperatorComplexity(tree.l, op) || flagBinOperatorComplexity(tree.r, op))
|
21 |
-
end
|
22 |
-
end
|
23 |
-
|
24 |
-
# Check if any unary operators are overly complex
|
25 |
-
function flagUnaOperatorComplexity(tree::Node, op::Int)::Bool
|
26 |
-
if tree.degree == 0
|
27 |
-
return false
|
28 |
-
elseif tree.degree == 1
|
29 |
-
if tree.op == op
|
30 |
-
overly_complex = (
|
31 |
-
(una_constraints[op] > -1) &&
|
32 |
-
(countNodes(tree.l) > una_constraints[op])
|
33 |
-
)
|
34 |
-
if overly_complex
|
35 |
-
return true
|
36 |
-
end
|
37 |
-
end
|
38 |
-
return flagUnaOperatorComplexity(tree.l, op)
|
39 |
-
else
|
40 |
-
return (flagUnaOperatorComplexity(tree.l, op) || flagUnaOperatorComplexity(tree.r, op))
|
41 |
-
end
|
42 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/ConstantOptimization.jl
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
import Optim
|
2 |
-
|
3 |
-
# Proxy function for optimization
|
4 |
-
function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
|
5 |
-
setConstants(tree, x)
|
6 |
-
return scoreFunc(tree)
|
7 |
-
end
|
8 |
-
|
9 |
-
# Use Nelder-Mead to optimize the constants in an equation
|
10 |
-
function optimizeConstants(member::PopMember)::PopMember
|
11 |
-
nconst = countConstants(member.tree)
|
12 |
-
if nconst == 0
|
13 |
-
return member
|
14 |
-
end
|
15 |
-
x0 = getConstants(member.tree)
|
16 |
-
f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
|
17 |
-
if size(x0)[1] == 1
|
18 |
-
algorithm = Optim.Newton
|
19 |
-
else
|
20 |
-
algorithm = Optim.NelderMead
|
21 |
-
end
|
22 |
-
|
23 |
-
try
|
24 |
-
result = Optim.optimize(f, x0, algorithm(), Optim.Options(iterations=100))
|
25 |
-
# Try other initial conditions:
|
26 |
-
for i=1:nrestarts
|
27 |
-
tmpresult = Optim.optimize(f, x0 .* (1f0 .+ 5f-1*randn(Float32, size(x0)[1])), algorithm(), Optim.Options(iterations=100))
|
28 |
-
if tmpresult.minimum < result.minimum
|
29 |
-
result = tmpresult
|
30 |
-
end
|
31 |
-
end
|
32 |
-
|
33 |
-
if Optim.converged(result)
|
34 |
-
setConstants(member.tree, result.minimizer)
|
35 |
-
member.score = convert(Float32, result.minimum)
|
36 |
-
member.birth = getTime()
|
37 |
-
else
|
38 |
-
setConstants(member.tree, x0)
|
39 |
-
end
|
40 |
-
catch error
|
41 |
-
# Fine if optimization encountered domain error, just return x0
|
42 |
-
if isa(error, AssertionError)
|
43 |
-
setConstants(member.tree, x0)
|
44 |
-
else
|
45 |
-
throw(error)
|
46 |
-
end
|
47 |
-
end
|
48 |
-
return member
|
49 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/Equation.jl
DELETED
@@ -1,173 +0,0 @@
|
|
1 |
-
# Define a serialization format for the symbolic equations:
|
2 |
-
mutable struct Node
|
3 |
-
#Holds operators, variables, constants in a tree
|
4 |
-
degree::Integer #0 for constant/variable, 1 for cos/sin, 2 for +/* etc.
|
5 |
-
val::Union{Float32, Integer} #Either const value, or enumerates variable
|
6 |
-
constant::Bool #false if variable
|
7 |
-
op::Integer #enumerates operator (separately for degree=1,2)
|
8 |
-
l::Union{Node, Nothing}
|
9 |
-
r::Union{Node, Nothing}
|
10 |
-
|
11 |
-
Node(val::Float32) = new(0, val, true, 1, nothing, nothing)
|
12 |
-
Node(val::Integer) = new(0, val, false, 1, nothing, nothing)
|
13 |
-
Node(op::Integer, l::Node) = new(1, 0.0f0, false, op, l, nothing)
|
14 |
-
Node(op::Integer, l::Union{Float32, Integer}) = new(1, 0.0f0, false, op, Node(l), nothing)
|
15 |
-
Node(op::Integer, l::Node, r::Node) = new(2, 0.0f0, false, op, l, r)
|
16 |
-
|
17 |
-
#Allow to pass the leaf value without additional node call:
|
18 |
-
Node(op::Integer, l::Union{Float32, Integer}, r::Node) = new(2, 0.0f0, false, op, Node(l), r)
|
19 |
-
Node(op::Integer, l::Node, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, l, Node(r))
|
20 |
-
Node(op::Integer, l::Union{Float32, Integer}, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, Node(l), Node(r))
|
21 |
-
end
|
22 |
-
|
23 |
-
# Copy an equation (faster than deepcopy)
|
24 |
-
function copyNode(tree::Node)::Node
|
25 |
-
if tree.degree == 0
|
26 |
-
return Node(tree.val)
|
27 |
-
elseif tree.degree == 1
|
28 |
-
return Node(tree.op, copyNode(tree.l))
|
29 |
-
else
|
30 |
-
return Node(tree.op, copyNode(tree.l), copyNode(tree.r))
|
31 |
-
end
|
32 |
-
end
|
33 |
-
|
34 |
-
# Count the operators, constants, variables in an equation
|
35 |
-
function countNodes(tree::Node)::Integer
|
36 |
-
if tree.degree == 0
|
37 |
-
return 1
|
38 |
-
elseif tree.degree == 1
|
39 |
-
return 1 + countNodes(tree.l)
|
40 |
-
else
|
41 |
-
return 1 + countNodes(tree.l) + countNodes(tree.r)
|
42 |
-
end
|
43 |
-
end
|
44 |
-
|
45 |
-
# Count the max depth of a tree
|
46 |
-
function countDepth(tree::Node)::Integer
|
47 |
-
if tree.degree == 0
|
48 |
-
return 1
|
49 |
-
elseif tree.degree == 1
|
50 |
-
return 1 + countDepth(tree.l)
|
51 |
-
else
|
52 |
-
return 1 + max(countDepth(tree.l), countDepth(tree.r))
|
53 |
-
end
|
54 |
-
end
|
55 |
-
|
56 |
-
# Convert an equation to a string
|
57 |
-
function stringTree(tree::Node)::String
|
58 |
-
if tree.degree == 0
|
59 |
-
if tree.constant
|
60 |
-
return string(tree.val)
|
61 |
-
else
|
62 |
-
if useVarMap
|
63 |
-
return varMap[tree.val]
|
64 |
-
else
|
65 |
-
return "x$(tree.val - 1)"
|
66 |
-
end
|
67 |
-
end
|
68 |
-
elseif tree.degree == 1
|
69 |
-
return "$(unaops[tree.op])($(stringTree(tree.l)))"
|
70 |
-
else
|
71 |
-
return "$(binops[tree.op])($(stringTree(tree.l)), $(stringTree(tree.r)))"
|
72 |
-
end
|
73 |
-
end
|
74 |
-
|
75 |
-
# Print an equation
|
76 |
-
function printTree(tree::Node)
|
77 |
-
println(stringTree(tree))
|
78 |
-
end
|
79 |
-
|
80 |
-
# Return a random node from the tree
|
81 |
-
function randomNode(tree::Node)::Node
|
82 |
-
if tree.degree == 0
|
83 |
-
return tree
|
84 |
-
end
|
85 |
-
a = countNodes(tree)
|
86 |
-
b = 0
|
87 |
-
c = 0
|
88 |
-
if tree.degree >= 1
|
89 |
-
b = countNodes(tree.l)
|
90 |
-
end
|
91 |
-
if tree.degree == 2
|
92 |
-
c = countNodes(tree.r)
|
93 |
-
end
|
94 |
-
|
95 |
-
i = rand(1:1+b+c)
|
96 |
-
if i <= b
|
97 |
-
return randomNode(tree.l)
|
98 |
-
elseif i == b + 1
|
99 |
-
return tree
|
100 |
-
end
|
101 |
-
|
102 |
-
return randomNode(tree.r)
|
103 |
-
end
|
104 |
-
|
105 |
-
# Count the number of unary operators in the equation
|
106 |
-
function countUnaryOperators(tree::Node)::Integer
|
107 |
-
if tree.degree == 0
|
108 |
-
return 0
|
109 |
-
elseif tree.degree == 1
|
110 |
-
return 1 + countUnaryOperators(tree.l)
|
111 |
-
else
|
112 |
-
return 0 + countUnaryOperators(tree.l) + countUnaryOperators(tree.r)
|
113 |
-
end
|
114 |
-
end
|
115 |
-
|
116 |
-
# Count the number of binary operators in the equation
|
117 |
-
function countBinaryOperators(tree::Node)::Integer
|
118 |
-
if tree.degree == 0
|
119 |
-
return 0
|
120 |
-
elseif tree.degree == 1
|
121 |
-
return 0 + countBinaryOperators(tree.l)
|
122 |
-
else
|
123 |
-
return 1 + countBinaryOperators(tree.l) + countBinaryOperators(tree.r)
|
124 |
-
end
|
125 |
-
end
|
126 |
-
|
127 |
-
# Count the number of operators in the equation
|
128 |
-
function countOperators(tree::Node)::Integer
|
129 |
-
return countUnaryOperators(tree) + countBinaryOperators(tree)
|
130 |
-
end
|
131 |
-
|
132 |
-
|
133 |
-
# Count the number of constants in an equation
|
134 |
-
function countConstants(tree::Node)::Integer
|
135 |
-
if tree.degree == 0
|
136 |
-
return convert(Integer, tree.constant)
|
137 |
-
elseif tree.degree == 1
|
138 |
-
return 0 + countConstants(tree.l)
|
139 |
-
else
|
140 |
-
return 0 + countConstants(tree.l) + countConstants(tree.r)
|
141 |
-
end
|
142 |
-
end
|
143 |
-
|
144 |
-
# Get all the constants from a tree
|
145 |
-
function getConstants(tree::Node)::Array{Float32, 1}
|
146 |
-
if tree.degree == 0
|
147 |
-
if tree.constant
|
148 |
-
return [tree.val]
|
149 |
-
else
|
150 |
-
return Float32[]
|
151 |
-
end
|
152 |
-
elseif tree.degree == 1
|
153 |
-
return getConstants(tree.l)
|
154 |
-
else
|
155 |
-
both = [getConstants(tree.l), getConstants(tree.r)]
|
156 |
-
return [constant for subtree in both for constant in subtree]
|
157 |
-
end
|
158 |
-
end
|
159 |
-
|
160 |
-
# Set all the constants inside a tree
|
161 |
-
function setConstants(tree::Node, constants::Array{Float32, 1})
|
162 |
-
if tree.degree == 0
|
163 |
-
if tree.constant
|
164 |
-
tree.val = constants[1]
|
165 |
-
end
|
166 |
-
elseif tree.degree == 1
|
167 |
-
setConstants(tree.l, constants)
|
168 |
-
else
|
169 |
-
numberLeft = countConstants(tree.l)
|
170 |
-
setConstants(tree.l, constants)
|
171 |
-
setConstants(tree.r, constants[numberLeft+1:end])
|
172 |
-
end
|
173 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/EvaluateEquation.jl
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
# Evaluate an equation over an array of datapoints
|
2 |
-
function evalTreeArray(tree::Node)::Union{Array{Float32, 1}, Nothing}
|
3 |
-
return evalTreeArray(tree, X)
|
4 |
-
end
|
5 |
-
|
6 |
-
|
7 |
-
# Evaluate an equation over an array of datapoints
|
8 |
-
function evalTreeArray(tree::Node, cX::Array{Float32, 2})::Union{Array{Float32, 1}, Nothing}
|
9 |
-
clen = size(cX)[1]
|
10 |
-
if tree.degree == 0
|
11 |
-
if tree.constant
|
12 |
-
return fill(tree.val, clen)
|
13 |
-
else
|
14 |
-
return copy(cX[:, tree.val])
|
15 |
-
end
|
16 |
-
elseif tree.degree == 1
|
17 |
-
cumulator = evalTreeArray(tree.l, cX)
|
18 |
-
if cumulator === nothing
|
19 |
-
return nothing
|
20 |
-
end
|
21 |
-
op_idx = tree.op
|
22 |
-
UNAOP!(cumulator, op_idx, clen)
|
23 |
-
@inbounds for i=1:clen
|
24 |
-
if isinf(cumulator[i]) || isnan(cumulator[i])
|
25 |
-
return nothing
|
26 |
-
end
|
27 |
-
end
|
28 |
-
return cumulator
|
29 |
-
else
|
30 |
-
cumulator = evalTreeArray(tree.l, cX)
|
31 |
-
if cumulator === nothing
|
32 |
-
return nothing
|
33 |
-
end
|
34 |
-
array2 = evalTreeArray(tree.r, cX)
|
35 |
-
if array2 === nothing
|
36 |
-
return nothing
|
37 |
-
end
|
38 |
-
op_idx = tree.op
|
39 |
-
BINOP!(cumulator, array2, op_idx, clen)
|
40 |
-
@inbounds for i=1:clen
|
41 |
-
if isinf(cumulator[i]) || isnan(cumulator[i])
|
42 |
-
return nothing
|
43 |
-
end
|
44 |
-
end
|
45 |
-
return cumulator
|
46 |
-
end
|
47 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/LossFunctions.jl
DELETED
@@ -1,82 +0,0 @@
|
|
1 |
-
import Random: randperm
|
2 |
-
|
3 |
-
# Sum of square error between two arrays
|
4 |
-
function SSE(x::Array{Float32}, y::Array{Float32})::Float32
|
5 |
-
diff = (x - y)
|
6 |
-
return sum(diff .* diff)
|
7 |
-
end
|
8 |
-
function SSE(x::Nothing, y::Array{Float32})::Float32
|
9 |
-
return 1f9
|
10 |
-
end
|
11 |
-
|
12 |
-
# Sum of square error between two arrays, with weights
|
13 |
-
function SSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
|
14 |
-
diff = (x - y)
|
15 |
-
return sum(diff .* diff .* w)
|
16 |
-
end
|
17 |
-
function SSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
|
18 |
-
return Nothing
|
19 |
-
end
|
20 |
-
|
21 |
-
# Mean of square error between two arrays
|
22 |
-
function MSE(x::Nothing, y::Array{Float32})::Float32
|
23 |
-
return 1f9
|
24 |
-
end
|
25 |
-
|
26 |
-
# Mean of square error between two arrays
|
27 |
-
function MSE(x::Array{Float32}, y::Array{Float32})::Float32
|
28 |
-
return SSE(x, y)/size(x)[1]
|
29 |
-
end
|
30 |
-
|
31 |
-
# Mean of square error between two arrays
|
32 |
-
function MSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
|
33 |
-
return 1f9
|
34 |
-
end
|
35 |
-
|
36 |
-
# Mean of square error between two arrays
|
37 |
-
function MSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
|
38 |
-
return SSE(x, y, w)/sum(w)
|
39 |
-
end
|
40 |
-
|
41 |
-
if weighted
|
42 |
-
const avgy = sum(y .* weights)/sum(weights)
|
43 |
-
const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy), weights)
|
44 |
-
else
|
45 |
-
const avgy = sum(y)/len
|
46 |
-
const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy))
|
47 |
-
end
|
48 |
-
|
49 |
-
# Score an equation
|
50 |
-
function scoreFunc(tree::Node)::Float32
|
51 |
-
prediction = evalTreeArray(tree)
|
52 |
-
if prediction === nothing
|
53 |
-
return 1f9
|
54 |
-
end
|
55 |
-
if weighted
|
56 |
-
mse = MSE(prediction, y, weights)
|
57 |
-
else
|
58 |
-
mse = MSE(prediction, y)
|
59 |
-
end
|
60 |
-
return mse / baselineMSE + countNodes(tree)*parsimony
|
61 |
-
end
|
62 |
-
|
63 |
-
# Score an equation with a small batch
|
64 |
-
function scoreFuncBatch(tree::Node)::Float32
|
65 |
-
# batchSize
|
66 |
-
batch_idx = randperm(len)[1:batchSize]
|
67 |
-
batch_X = X[batch_idx, :]
|
68 |
-
prediction = evalTreeArray(tree, batch_X)
|
69 |
-
if prediction === nothing
|
70 |
-
return 1f9
|
71 |
-
end
|
72 |
-
size_adjustment = 1f0
|
73 |
-
batch_y = y[batch_idx]
|
74 |
-
if weighted
|
75 |
-
batch_w = weights[batch_idx]
|
76 |
-
mse = MSE(prediction, batch_y, batch_w)
|
77 |
-
size_adjustment = 1f0 * len / batchSize
|
78 |
-
else
|
79 |
-
mse = MSE(prediction, batch_y)
|
80 |
-
end
|
81 |
-
return size_adjustment * mse / baselineMSE + countNodes(tree)*parsimony
|
82 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/Mutate.jl
DELETED
@@ -1,124 +0,0 @@
|
|
1 |
-
# Go through one mutation cycle
|
2 |
-
function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyComplexity::Array{Float32, 1})::PopMember
|
3 |
-
prev = member.tree
|
4 |
-
tree = prev
|
5 |
-
#TODO - reconsider this
|
6 |
-
if batching
|
7 |
-
beforeLoss = scoreFuncBatch(prev)
|
8 |
-
else
|
9 |
-
beforeLoss = member.score
|
10 |
-
end
|
11 |
-
|
12 |
-
mutationChoice = rand()
|
13 |
-
#More constants => more likely to do constant mutation
|
14 |
-
weightAdjustmentMutateConstant = min(8, countConstants(prev))/8.0
|
15 |
-
cur_weights = copy(mutationWeights) .* 1.0
|
16 |
-
cur_weights[1] *= weightAdjustmentMutateConstant
|
17 |
-
n = countNodes(prev)
|
18 |
-
depth = countDepth(prev)
|
19 |
-
|
20 |
-
# If equation too big, don't add new operators
|
21 |
-
if n >= curmaxsize || depth >= maxdepth
|
22 |
-
cur_weights[3] = 0.0
|
23 |
-
cur_weights[4] = 0.0
|
24 |
-
end
|
25 |
-
cur_weights /= sum(cur_weights)
|
26 |
-
cweights = cumsum(cur_weights)
|
27 |
-
|
28 |
-
successful_mutation = false
|
29 |
-
#TODO: Currently we dont take this \/ into account
|
30 |
-
is_success_always_possible = true
|
31 |
-
attempts = 0
|
32 |
-
max_attempts = 10
|
33 |
-
|
34 |
-
#############################################
|
35 |
-
# Mutations
|
36 |
-
#############################################
|
37 |
-
while (!successful_mutation) && attempts < max_attempts
|
38 |
-
tree = copyNode(prev)
|
39 |
-
successful_mutation = true
|
40 |
-
if mutationChoice < cweights[1]
|
41 |
-
tree = mutateConstant(tree, T)
|
42 |
-
|
43 |
-
is_success_always_possible = true
|
44 |
-
# Mutating a constant shouldn't invalidate an already-valid function
|
45 |
-
|
46 |
-
elseif mutationChoice < cweights[2]
|
47 |
-
tree = mutateOperator(tree)
|
48 |
-
|
49 |
-
is_success_always_possible = true
|
50 |
-
# Can always mutate to the same operator
|
51 |
-
|
52 |
-
elseif mutationChoice < cweights[3]
|
53 |
-
if rand() < 0.5
|
54 |
-
tree = appendRandomOp(tree)
|
55 |
-
else
|
56 |
-
tree = prependRandomOp(tree)
|
57 |
-
end
|
58 |
-
is_success_always_possible = false
|
59 |
-
# Can potentially have a situation without success
|
60 |
-
elseif mutationChoice < cweights[4]
|
61 |
-
tree = insertRandomOp(tree)
|
62 |
-
is_success_always_possible = false
|
63 |
-
elseif mutationChoice < cweights[5]
|
64 |
-
tree = deleteRandomOp(tree)
|
65 |
-
is_success_always_possible = true
|
66 |
-
elseif mutationChoice < cweights[6]
|
67 |
-
tree = simplifyTree(tree) # Sometimes we simplify tree
|
68 |
-
tree = combineOperators(tree) # See if repeated constants at outer levels
|
69 |
-
return PopMember(tree, beforeLoss)
|
70 |
-
|
71 |
-
is_success_always_possible = true
|
72 |
-
# Simplification shouldn't hurt complexity; unless some non-symmetric constraint
|
73 |
-
# to commutative operator...
|
74 |
-
|
75 |
-
elseif mutationChoice < cweights[7]
|
76 |
-
tree = genRandomTree(5) # Sometimes we generate a new tree completely tree
|
77 |
-
|
78 |
-
is_success_always_possible = true
|
79 |
-
else # no mutation applied
|
80 |
-
return PopMember(tree, beforeLoss)
|
81 |
-
end
|
82 |
-
|
83 |
-
# Check for illegal equations
|
84 |
-
for i=1:nbin
|
85 |
-
if successful_mutation && flagBinOperatorComplexity(tree, i)
|
86 |
-
successful_mutation = false
|
87 |
-
end
|
88 |
-
end
|
89 |
-
for i=1:nuna
|
90 |
-
if successful_mutation && flagUnaOperatorComplexity(tree, i)
|
91 |
-
successful_mutation = false
|
92 |
-
end
|
93 |
-
end
|
94 |
-
|
95 |
-
attempts += 1
|
96 |
-
end
|
97 |
-
#############################################
|
98 |
-
|
99 |
-
if !successful_mutation
|
100 |
-
return PopMember(copyNode(prev), beforeLoss)
|
101 |
-
end
|
102 |
-
|
103 |
-
if batching
|
104 |
-
afterLoss = scoreFuncBatch(tree)
|
105 |
-
else
|
106 |
-
afterLoss = scoreFunc(tree)
|
107 |
-
end
|
108 |
-
|
109 |
-
if annealing
|
110 |
-
delta = afterLoss - beforeLoss
|
111 |
-
probChange = exp(-delta/(T*alpha))
|
112 |
-
if useFrequency
|
113 |
-
oldSize = countNodes(prev)
|
114 |
-
newSize = countNodes(tree)
|
115 |
-
probChange *= frequencyComplexity[oldSize] / frequencyComplexity[newSize]
|
116 |
-
end
|
117 |
-
|
118 |
-
return_unaltered = (isnan(afterLoss) || probChange < rand())
|
119 |
-
if return_unaltered
|
120 |
-
return PopMember(copyNode(prev), beforeLoss)
|
121 |
-
end
|
122 |
-
end
|
123 |
-
return PopMember(tree, afterLoss)
|
124 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/MutationFunctions.jl
DELETED
@@ -1,239 +0,0 @@
|
|
1 |
-
# Randomly convert an operator into another one (binary->binary;
|
2 |
-
# unary->unary)
|
3 |
-
function mutateOperator(tree::Node)::Node
|
4 |
-
if countOperators(tree) == 0
|
5 |
-
return tree
|
6 |
-
end
|
7 |
-
node = randomNode(tree)
|
8 |
-
while node.degree == 0
|
9 |
-
node = randomNode(tree)
|
10 |
-
end
|
11 |
-
if node.degree == 1
|
12 |
-
node.op = rand(1:length(unaops))
|
13 |
-
else
|
14 |
-
node.op = rand(1:length(binops))
|
15 |
-
end
|
16 |
-
return tree
|
17 |
-
end
|
18 |
-
|
19 |
-
# Randomly perturb a constant
|
20 |
-
function mutateConstant(
|
21 |
-
tree::Node, T::Float32,
|
22 |
-
probNegate::Float32=0.01f0)::Node
|
23 |
-
# T is between 0 and 1.
|
24 |
-
|
25 |
-
if countConstants(tree) == 0
|
26 |
-
return tree
|
27 |
-
end
|
28 |
-
node = randomNode(tree)
|
29 |
-
while node.degree != 0 || node.constant == false
|
30 |
-
node = randomNode(tree)
|
31 |
-
end
|
32 |
-
|
33 |
-
bottom = 0.1f0
|
34 |
-
maxChange = perturbationFactor * T + 1.0f0 + bottom
|
35 |
-
factor = maxChange^Float32(rand())
|
36 |
-
makeConstBigger = rand() > 0.5
|
37 |
-
|
38 |
-
if makeConstBigger
|
39 |
-
node.val *= factor
|
40 |
-
else
|
41 |
-
node.val /= factor
|
42 |
-
end
|
43 |
-
|
44 |
-
if rand() > probNegate
|
45 |
-
node.val *= -1
|
46 |
-
end
|
47 |
-
|
48 |
-
return tree
|
49 |
-
end
|
50 |
-
|
51 |
-
# Add a random unary/binary operation to the end of a tree
|
52 |
-
function appendRandomOp(tree::Node)::Node
|
53 |
-
node = randomNode(tree)
|
54 |
-
while node.degree != 0
|
55 |
-
node = randomNode(tree)
|
56 |
-
end
|
57 |
-
|
58 |
-
choice = rand()
|
59 |
-
makeNewBinOp = choice < nbin/nops
|
60 |
-
if rand() > 0.5
|
61 |
-
left = Float32(randn())
|
62 |
-
else
|
63 |
-
left = rand(1:nvar)
|
64 |
-
end
|
65 |
-
if rand() > 0.5
|
66 |
-
right = Float32(randn())
|
67 |
-
else
|
68 |
-
right = rand(1:nvar)
|
69 |
-
end
|
70 |
-
|
71 |
-
if makeNewBinOp
|
72 |
-
newnode = Node(
|
73 |
-
rand(1:length(binops)),
|
74 |
-
left,
|
75 |
-
right
|
76 |
-
)
|
77 |
-
else
|
78 |
-
newnode = Node(
|
79 |
-
rand(1:length(unaops)),
|
80 |
-
left
|
81 |
-
)
|
82 |
-
end
|
83 |
-
node.l = newnode.l
|
84 |
-
node.r = newnode.r
|
85 |
-
node.op = newnode.op
|
86 |
-
node.degree = newnode.degree
|
87 |
-
node.val = newnode.val
|
88 |
-
node.constant = newnode.constant
|
89 |
-
return tree
|
90 |
-
end
|
91 |
-
|
92 |
-
# Insert random node
|
93 |
-
function insertRandomOp(tree::Node)::Node
|
94 |
-
node = randomNode(tree)
|
95 |
-
choice = rand()
|
96 |
-
makeNewBinOp = choice < nbin/nops
|
97 |
-
left = copyNode(node)
|
98 |
-
|
99 |
-
if makeNewBinOp
|
100 |
-
right = randomConstantNode()
|
101 |
-
newnode = Node(
|
102 |
-
rand(1:length(binops)),
|
103 |
-
left,
|
104 |
-
right
|
105 |
-
)
|
106 |
-
else
|
107 |
-
newnode = Node(
|
108 |
-
rand(1:length(unaops)),
|
109 |
-
left
|
110 |
-
)
|
111 |
-
end
|
112 |
-
node.l = newnode.l
|
113 |
-
node.r = newnode.r
|
114 |
-
node.op = newnode.op
|
115 |
-
node.degree = newnode.degree
|
116 |
-
node.val = newnode.val
|
117 |
-
node.constant = newnode.constant
|
118 |
-
return tree
|
119 |
-
end
|
120 |
-
|
121 |
-
# Add random node to the top of a tree
|
122 |
-
function prependRandomOp(tree::Node)::Node
|
123 |
-
node = tree
|
124 |
-
choice = rand()
|
125 |
-
makeNewBinOp = choice < nbin/nops
|
126 |
-
left = copyNode(tree)
|
127 |
-
|
128 |
-
if makeNewBinOp
|
129 |
-
right = randomConstantNode()
|
130 |
-
newnode = Node(
|
131 |
-
rand(1:length(binops)),
|
132 |
-
left,
|
133 |
-
right
|
134 |
-
)
|
135 |
-
else
|
136 |
-
newnode = Node(
|
137 |
-
rand(1:length(unaops)),
|
138 |
-
left
|
139 |
-
)
|
140 |
-
end
|
141 |
-
node.l = newnode.l
|
142 |
-
node.r = newnode.r
|
143 |
-
node.op = newnode.op
|
144 |
-
node.degree = newnode.degree
|
145 |
-
node.val = newnode.val
|
146 |
-
node.constant = newnode.constant
|
147 |
-
return node
|
148 |
-
end
|
149 |
-
|
150 |
-
function randomConstantNode()::Node
|
151 |
-
if rand() > 0.5
|
152 |
-
val = Float32(randn())
|
153 |
-
else
|
154 |
-
val = rand(1:nvar)
|
155 |
-
end
|
156 |
-
newnode = Node(val)
|
157 |
-
return newnode
|
158 |
-
end
|
159 |
-
|
160 |
-
# Return a random node from the tree with parent
|
161 |
-
function randomNodeAndParent(tree::Node, parent::Union{Node, Nothing})::Tuple{Node, Union{Node, Nothing}}
|
162 |
-
if tree.degree == 0
|
163 |
-
return tree, parent
|
164 |
-
end
|
165 |
-
a = countNodes(tree)
|
166 |
-
b = 0
|
167 |
-
c = 0
|
168 |
-
if tree.degree >= 1
|
169 |
-
b = countNodes(tree.l)
|
170 |
-
end
|
171 |
-
if tree.degree == 2
|
172 |
-
c = countNodes(tree.r)
|
173 |
-
end
|
174 |
-
|
175 |
-
i = rand(1:1+b+c)
|
176 |
-
if i <= b
|
177 |
-
return randomNodeAndParent(tree.l, tree)
|
178 |
-
elseif i == b + 1
|
179 |
-
return tree, parent
|
180 |
-
end
|
181 |
-
|
182 |
-
return randomNodeAndParent(tree.r, tree)
|
183 |
-
end
|
184 |
-
|
185 |
-
# Select a random node, and replace it an the subtree
|
186 |
-
# with a variable or constant
|
187 |
-
function deleteRandomOp(tree::Node)::Node
|
188 |
-
node, parent = randomNodeAndParent(tree, nothing)
|
189 |
-
isroot = (parent === nothing)
|
190 |
-
|
191 |
-
if node.degree == 0
|
192 |
-
# Replace with new constant
|
193 |
-
newnode = randomConstantNode()
|
194 |
-
node.l = newnode.l
|
195 |
-
node.r = newnode.r
|
196 |
-
node.op = newnode.op
|
197 |
-
node.degree = newnode.degree
|
198 |
-
node.val = newnode.val
|
199 |
-
node.constant = newnode.constant
|
200 |
-
elseif node.degree == 1
|
201 |
-
# Join one of the children with the parent
|
202 |
-
if isroot
|
203 |
-
return node.l
|
204 |
-
elseif parent.l == node
|
205 |
-
parent.l = node.l
|
206 |
-
else
|
207 |
-
parent.r = node.l
|
208 |
-
end
|
209 |
-
else
|
210 |
-
# Join one of the children with the parent
|
211 |
-
if rand() < 0.5
|
212 |
-
if isroot
|
213 |
-
return node.l
|
214 |
-
elseif parent.l == node
|
215 |
-
parent.l = node.l
|
216 |
-
else
|
217 |
-
parent.r = node.l
|
218 |
-
end
|
219 |
-
else
|
220 |
-
if isroot
|
221 |
-
return node.r
|
222 |
-
elseif parent.l == node
|
223 |
-
parent.l = node.r
|
224 |
-
else
|
225 |
-
parent.r = node.r
|
226 |
-
end
|
227 |
-
end
|
228 |
-
end
|
229 |
-
return tree
|
230 |
-
end
|
231 |
-
|
232 |
-
# Create a random equation by appending random operators
|
233 |
-
function genRandomTree(length::Integer)::Node
|
234 |
-
tree = Node(1.0f0)
|
235 |
-
for i=1:length
|
236 |
-
tree = appendRandomOp(tree)
|
237 |
-
end
|
238 |
-
return tree
|
239 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/Operators.jl
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import SpecialFunctions: gamma, lgamma, erf, erfc, beta
|
2 |
-
|
3 |
-
|
4 |
-
import Base.FastMath: sqrt_llvm_fast, neg_float_fast,
|
5 |
-
add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
|
6 |
-
eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast,
|
7 |
-
sign_fast, abs_fast, log_fast, log2_fast, log10_fast, sqrt_fast,
|
8 |
-
pow_fast
|
9 |
-
|
10 |
-
# Implicitly defined:
|
11 |
-
#binary: mod
|
12 |
-
#unary: exp, abs, log1p, sin, cos, tan, sinh, cosh, tanh, asin, acos, atan, asinh, acosh, atanh, erf, erfc, gamma, relu, round, floor, ceil, round, sign.
|
13 |
-
|
14 |
-
# Use some fast operators from https://github.com/JuliaLang/julia/blob/81597635c4ad1e8c2e1c5753fda4ec0e7397543f/base/fastmath.jl
|
15 |
-
# Define allowed operators. Any julia operator can also be used.
|
16 |
-
plus(x::Float32, y::Float32)::Float32 = add_float_fast(x, y) #Do not change the name of this operator.
|
17 |
-
sub(x::Float32, y::Float32)::Float32 = sub_float_fast(x, y) #Do not change the name of this operator.
|
18 |
-
mult(x::Float32, y::Float32)::Float32 = mul_float_fast(x, y) #Do not change the name of this operator.
|
19 |
-
square(x::Float32)::Float32 = mul_float_fast(x, x)
|
20 |
-
cube(x::Float32)::Float32 = mul_float_fast(mul_float_fast(x, x), x)
|
21 |
-
pow(x::Float32, y::Float32)::Float32 = sign_fast(x)*pow_fast(abs(x), y)
|
22 |
-
div(x::Float32, y::Float32)::Float32 = div_float_fast(x, y)
|
23 |
-
logm(x::Float32)::Float32 = log_fast(abs_fast(x) + 1f-8)
|
24 |
-
logm2(x::Float32)::Float32 = log2_fast(abs_fast(x) + 1f-8)
|
25 |
-
logm10(x::Float32)::Float32 = log10_fast(abs_fast(x) + 1f-8)
|
26 |
-
sqrtm(x::Float32)::Float32 = sqrt_fast(abs_fast(x))
|
27 |
-
neg(x::Float32)::Float32 = neg_float_fast(x)
|
28 |
-
|
29 |
-
function greater(x::Float32, y::Float32)::Float32
|
30 |
-
if x > y
|
31 |
-
return 1f0
|
32 |
-
end
|
33 |
-
return 0f0
|
34 |
-
end
|
35 |
-
|
36 |
-
function relu(x::Float32)::Float32
|
37 |
-
if x > 0f0
|
38 |
-
return x
|
39 |
-
end
|
40 |
-
return 0f0
|
41 |
-
end
|
42 |
-
|
43 |
-
function logical_or(x::Float32, y::Float32)::Float32
|
44 |
-
if x > 0f0 || y > 0f0
|
45 |
-
return 1f0
|
46 |
-
end
|
47 |
-
return 0f0
|
48 |
-
end
|
49 |
-
|
50 |
-
# (Just use multiplication normally)
|
51 |
-
function logical_and(x::Float32, y::Float32)::Float32
|
52 |
-
if x > 0f0 && y > 0f0
|
53 |
-
return 1f0
|
54 |
-
end
|
55 |
-
return 0f0
|
56 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/PopMember.jl
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
# Define a member of population by equation, score, and age
|
2 |
-
mutable struct PopMember
|
3 |
-
tree::Node
|
4 |
-
score::Float32
|
5 |
-
birth::Integer
|
6 |
-
|
7 |
-
PopMember(t::Node) = new(t, scoreFunc(t), getTime())
|
8 |
-
PopMember(t::Node, score::Float32) = new(t, score, getTime())
|
9 |
-
|
10 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/Population.jl
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
# A list of members of the population, with easy constructors,
|
2 |
-
# which allow for random generation of new populations
|
3 |
-
mutable struct Population
|
4 |
-
members::Array{PopMember, 1}
|
5 |
-
n::Integer
|
6 |
-
|
7 |
-
Population(pop::Array{PopMember, 1}) = new(pop, size(pop)[1])
|
8 |
-
Population(npop::Integer) = new([PopMember(genRandomTree(3)) for i=1:npop], npop)
|
9 |
-
Population(npop::Integer, nlength::Integer) = new([PopMember(genRandomTree(nlength)) for i=1:npop], npop)
|
10 |
-
|
11 |
-
end
|
12 |
-
|
13 |
-
# Sample 10 random members of the population, and make a new one
|
14 |
-
function samplePop(pop::Population)::Population
|
15 |
-
idx = rand(1:pop.n, ns)
|
16 |
-
return Population(pop.members[idx])
|
17 |
-
end
|
18 |
-
|
19 |
-
# Sample the population, and get the best member from that sample
|
20 |
-
function bestOfSample(pop::Population)::PopMember
|
21 |
-
sample = samplePop(pop)
|
22 |
-
best_idx = argmin([sample.members[member].score for member=1:sample.n])
|
23 |
-
return sample.members[best_idx]
|
24 |
-
end
|
25 |
-
|
26 |
-
function finalizeScores(pop::Population)::Population
|
27 |
-
need_recalculate = batching
|
28 |
-
if need_recalculate
|
29 |
-
@inbounds @simd for member=1:pop.n
|
30 |
-
pop.members[member].score = scoreFunc(pop.members[member].tree)
|
31 |
-
end
|
32 |
-
end
|
33 |
-
return pop
|
34 |
-
end
|
35 |
-
|
36 |
-
# Return best 10 examples
|
37 |
-
function bestSubPop(pop::Population; topn::Integer=10)::Population
|
38 |
-
best_idx = sortperm([pop.members[member].score for member=1:pop.n])
|
39 |
-
return Population(pop.members[best_idx[1:topn]])
|
40 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/ProgramConstants.jl
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
|
2 |
-
const maxdegree = 2
|
3 |
-
const actualMaxsize = maxsize + maxdegree
|
4 |
-
const len = size(X)[1]
|
5 |
-
|
6 |
-
const nuna = size(unaops)[1]
|
7 |
-
const nbin = size(binops)[1]
|
8 |
-
const nops = nuna + nbin
|
9 |
-
const nvar = size(X)[2];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/RegularizedEvolution.jl
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
import Random: shuffle!
|
2 |
-
|
3 |
-
# Pass through the population several times, replacing the oldest
|
4 |
-
# with the fittest of a small subsample
|
5 |
-
function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
|
6 |
-
frequencyComplexity::Array{Float32, 1})::Population
|
7 |
-
# Batch over each subsample. Can give 15% improvement in speed; probably moreso for large pops.
|
8 |
-
# but is ultimately a different algorithm than regularized evolution, and might not be
|
9 |
-
# as good.
|
10 |
-
if fast_cycle
|
11 |
-
shuffle!(pop.members)
|
12 |
-
n_evol_cycles = round(Integer, pop.n/ns)
|
13 |
-
babies = Array{PopMember}(undef, n_evol_cycles)
|
14 |
-
|
15 |
-
# Iterate each ns-member sub-sample
|
16 |
-
@inbounds Threads.@threads for i=1:n_evol_cycles
|
17 |
-
best_score = Inf32
|
18 |
-
best_idx = 1+(i-1)*ns
|
19 |
-
# Calculate best member of the subsample:
|
20 |
-
for sub_i=1+(i-1)*ns:i*ns
|
21 |
-
if pop.members[sub_i].score < best_score
|
22 |
-
best_score = pop.members[sub_i].score
|
23 |
-
best_idx = sub_i
|
24 |
-
end
|
25 |
-
end
|
26 |
-
allstar = pop.members[best_idx]
|
27 |
-
babies[i] = iterate(allstar, T, curmaxsize, frequencyComplexity)
|
28 |
-
end
|
29 |
-
|
30 |
-
# Replace the n_evol_cycles-oldest members of each population
|
31 |
-
@inbounds for i=1:n_evol_cycles
|
32 |
-
oldest = argmin([pop.members[member].birth for member=1:pop.n])
|
33 |
-
pop.members[oldest] = babies[i]
|
34 |
-
end
|
35 |
-
else
|
36 |
-
for i=1:round(Integer, pop.n/ns)
|
37 |
-
allstar = bestOfSample(pop)
|
38 |
-
baby = iterate(allstar, T, curmaxsize, frequencyComplexity)
|
39 |
-
#printTree(baby.tree)
|
40 |
-
oldest = argmin([pop.members[member].birth for member=1:pop.n])
|
41 |
-
pop.members[oldest] = baby
|
42 |
-
end
|
43 |
-
end
|
44 |
-
|
45 |
-
return pop
|
46 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/SimplifyEquation.jl
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
# Simplify tree
|
2 |
-
function combineOperators(tree::Node)::Node
|
3 |
-
# NOTE: (const (+*-) const) already accounted for. Call simplifyTree before.
|
4 |
-
# ((const + var) + const) => (const + var)
|
5 |
-
# ((const * var) * const) => (const * var)
|
6 |
-
# ((const - var) - const) => (const - var)
|
7 |
-
# (want to add anything commutative!)
|
8 |
-
# TODO - need to combine plus/sub if they are both there.
|
9 |
-
if tree.degree == 0
|
10 |
-
return tree
|
11 |
-
elseif tree.degree == 1
|
12 |
-
tree.l = combineOperators(tree.l)
|
13 |
-
elseif tree.degree == 2
|
14 |
-
tree.l = combineOperators(tree.l)
|
15 |
-
tree.r = combineOperators(tree.r)
|
16 |
-
end
|
17 |
-
|
18 |
-
top_level_constant = tree.degree == 2 && (tree.l.constant || tree.r.constant)
|
19 |
-
if tree.degree == 2 && (binops[tree.op] === mult || binops[tree.op] === plus) && top_level_constant
|
20 |
-
op = tree.op
|
21 |
-
# Put the constant in r. Need to assume var in left for simplification assumption.
|
22 |
-
if tree.l.constant
|
23 |
-
tmp = tree.r
|
24 |
-
tree.r = tree.l
|
25 |
-
tree.l = tmp
|
26 |
-
end
|
27 |
-
topconstant = tree.r.val
|
28 |
-
# Simplify down first
|
29 |
-
below = tree.l
|
30 |
-
if below.degree == 2 && below.op == op
|
31 |
-
if below.l.constant
|
32 |
-
tree = below
|
33 |
-
tree.l.val = binops[op](tree.l.val, topconstant)
|
34 |
-
elseif below.r.constant
|
35 |
-
tree = below
|
36 |
-
tree.r.val = binops[op](tree.r.val, topconstant)
|
37 |
-
end
|
38 |
-
end
|
39 |
-
end
|
40 |
-
|
41 |
-
if tree.degree == 2 && binops[tree.op] === sub && top_level_constant
|
42 |
-
# Currently just simplifies subtraction. (can't assume both plus and sub are operators)
|
43 |
-
# Not commutative, so use different op.
|
44 |
-
if tree.l.constant
|
45 |
-
if tree.r.degree == 2 && binops[tree.r.op] === sub
|
46 |
-
if tree.r.l.constant
|
47 |
-
#(const - (const - var)) => (var - const)
|
48 |
-
l = tree.l
|
49 |
-
r = tree.r
|
50 |
-
simplified_const = -(l.val - r.l.val) #neg(sub(l.val, r.l.val))
|
51 |
-
tree.l = tree.r.r
|
52 |
-
tree.r = l
|
53 |
-
tree.r.val = simplified_const
|
54 |
-
elseif tree.r.r.constant
|
55 |
-
#(const - (var - const)) => (const - var)
|
56 |
-
l = tree.l
|
57 |
-
r = tree.r
|
58 |
-
simplified_const = l.val + r.r.val #plus(l.val, r.r.val)
|
59 |
-
tree.r = tree.r.l
|
60 |
-
tree.l.val = simplified_const
|
61 |
-
end
|
62 |
-
end
|
63 |
-
else #tree.r.constant is true
|
64 |
-
if tree.l.degree == 2 && binops[tree.l.op] === sub
|
65 |
-
if tree.l.l.constant
|
66 |
-
#((const - var) - const) => (const - var)
|
67 |
-
l = tree.l
|
68 |
-
r = tree.r
|
69 |
-
simplified_const = l.l.val - r.val#sub(l.l.val, r.val)
|
70 |
-
tree.r = tree.l.r
|
71 |
-
tree.l = r
|
72 |
-
tree.l.val = simplified_const
|
73 |
-
elseif tree.l.r.constant
|
74 |
-
#((var - const) - const) => (var - const)
|
75 |
-
l = tree.l
|
76 |
-
r = tree.r
|
77 |
-
simplified_const = r.val + l.r.val #plus(r.val, l.r.val)
|
78 |
-
tree.l = tree.l.l
|
79 |
-
tree.r.val = simplified_const
|
80 |
-
end
|
81 |
-
end
|
82 |
-
end
|
83 |
-
end
|
84 |
-
return tree
|
85 |
-
end
|
86 |
-
|
87 |
-
# Simplify tree
|
88 |
-
function simplifyTree(tree::Node)::Node
|
89 |
-
if tree.degree == 1
|
90 |
-
tree.l = simplifyTree(tree.l)
|
91 |
-
if tree.l.degree == 0 && tree.l.constant
|
92 |
-
return Node(unaops[tree.op](tree.l.val))
|
93 |
-
end
|
94 |
-
elseif tree.degree == 2
|
95 |
-
tree.l = simplifyTree(tree.l)
|
96 |
-
tree.r = simplifyTree(tree.r)
|
97 |
-
constantsBelow = (
|
98 |
-
tree.l.degree == 0 && tree.l.constant &&
|
99 |
-
tree.r.degree == 0 && tree.r.constant
|
100 |
-
)
|
101 |
-
if constantsBelow
|
102 |
-
return Node(binops[tree.op](tree.l.val, tree.r.val))
|
103 |
-
end
|
104 |
-
end
|
105 |
-
return tree
|
106 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/SingleIteration.jl
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
# Cycle through regularized evolution many times,
|
2 |
-
# printing the fittest equation every 10% through
|
3 |
-
function run(
|
4 |
-
pop::Population,
|
5 |
-
ncycles::Integer,
|
6 |
-
curmaxsize::Integer,
|
7 |
-
frequencyComplexity::Array{Float32, 1};
|
8 |
-
verbosity::Integer=0
|
9 |
-
)::Population
|
10 |
-
|
11 |
-
allT = LinRange(1.0f0, 0.0f0, ncycles)
|
12 |
-
for iT in 1:size(allT)[1]
|
13 |
-
if annealing
|
14 |
-
pop = regEvolCycle(pop, allT[iT], curmaxsize, frequencyComplexity)
|
15 |
-
else
|
16 |
-
pop = regEvolCycle(pop, 1.0f0, curmaxsize, frequencyComplexity)
|
17 |
-
end
|
18 |
-
|
19 |
-
if verbosity > 0 && (iT % verbosity == 0)
|
20 |
-
bestPops = bestSubPop(pop)
|
21 |
-
bestCurScoreIdx = argmin([bestPops.members[member].score for member=1:bestPops.n])
|
22 |
-
bestCurScore = bestPops.members[bestCurScoreIdx].score
|
23 |
-
debug(verbosity, bestCurScore, " is the score for ", stringTree(bestPops.members[bestCurScoreIdx].tree))
|
24 |
-
end
|
25 |
-
end
|
26 |
-
|
27 |
-
return pop
|
28 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/Utils.jl
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
import Printf: @printf
|
2 |
-
|
3 |
-
function id(x::Float32)::Float32
|
4 |
-
x
|
5 |
-
end
|
6 |
-
|
7 |
-
function debug(verbosity, string...)
|
8 |
-
verbosity > 0 ? println(string...) : nothing
|
9 |
-
end
|
10 |
-
|
11 |
-
function getTime()::Integer
|
12 |
-
return round(Integer, 1e3*(time()-1.6e9))
|
13 |
-
end
|
14 |
-
|
15 |
-
# Check for errors before they happen
|
16 |
-
function testConfiguration()
|
17 |
-
test_input = LinRange(-100f0, 100f0, 99)
|
18 |
-
|
19 |
-
try
|
20 |
-
for left in test_input
|
21 |
-
for right in test_input
|
22 |
-
for binop in binops
|
23 |
-
test_output = binop.(left, right)
|
24 |
-
end
|
25 |
-
end
|
26 |
-
for unaop in unaops
|
27 |
-
test_output = unaop.(left)
|
28 |
-
end
|
29 |
-
end
|
30 |
-
catch error
|
31 |
-
@printf("\n\nYour configuration is invalid - one of your operators is not well-defined over the real line.\n\n\n")
|
32 |
-
throw(error)
|
33 |
-
end
|
34 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/halloffame.jl
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
# List of the best members seen all time
|
2 |
-
mutable struct HallOfFame
|
3 |
-
members::Array{PopMember, 1}
|
4 |
-
exists::Array{Bool, 1} #Whether it has been set
|
5 |
-
|
6 |
-
# Arranged by complexity - store one at each.
|
7 |
-
HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
|
8 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/sr.jl
DELETED
@@ -1,218 +0,0 @@
|
|
1 |
-
import Printf: @printf
|
2 |
-
|
3 |
-
function fullRun(niterations::Integer;
|
4 |
-
npop::Integer=300,
|
5 |
-
ncyclesperiteration::Integer=3000,
|
6 |
-
fractionReplaced::Float32=0.1f0,
|
7 |
-
verbosity::Integer=0,
|
8 |
-
topn::Integer=10
|
9 |
-
)
|
10 |
-
|
11 |
-
testConfiguration()
|
12 |
-
|
13 |
-
# 1. Start a population on every process
|
14 |
-
allPops = Future[]
|
15 |
-
# Set up a channel to send finished populations back to head node
|
16 |
-
channels = [RemoteChannel(1) for j=1:npopulations]
|
17 |
-
bestSubPops = [Population(1) for j=1:npopulations]
|
18 |
-
hallOfFame = HallOfFame()
|
19 |
-
frequencyComplexity = ones(Float32, actualMaxsize)
|
20 |
-
curmaxsize = 3
|
21 |
-
if warmupMaxsize == 0
|
22 |
-
curmaxsize = maxsize
|
23 |
-
end
|
24 |
-
|
25 |
-
for i=1:npopulations
|
26 |
-
future = @spawnat :any Population(npop, 3)
|
27 |
-
push!(allPops, future)
|
28 |
-
end
|
29 |
-
|
30 |
-
# # 2. Start the cycle on every process:
|
31 |
-
@sync for i=1:npopulations
|
32 |
-
@async allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
|
33 |
-
end
|
34 |
-
println("Started!")
|
35 |
-
cycles_complete = npopulations * niterations
|
36 |
-
if warmupMaxsize != 0
|
37 |
-
curmaxsize += 1
|
38 |
-
if curmaxsize > maxsize
|
39 |
-
curmaxsize = maxsize
|
40 |
-
end
|
41 |
-
end
|
42 |
-
|
43 |
-
last_print_time = time()
|
44 |
-
num_equations = 0.0
|
45 |
-
print_every_n_seconds = 5
|
46 |
-
equation_speed = Float32[]
|
47 |
-
|
48 |
-
for i=1:npopulations
|
49 |
-
# Start listening for each population to finish:
|
50 |
-
@async put!(channels[i], fetch(allPops[i]))
|
51 |
-
end
|
52 |
-
|
53 |
-
while cycles_complete > 0
|
54 |
-
@inbounds for i=1:npopulations
|
55 |
-
# Non-blocking check if a population is ready:
|
56 |
-
if isready(channels[i])
|
57 |
-
# Take the fetch operation from the channel since its ready
|
58 |
-
cur_pop = take!(channels[i])
|
59 |
-
bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
|
60 |
-
|
61 |
-
#Try normal copy...
|
62 |
-
bestPops = Population([member for pop in bestSubPops for member in pop.members])
|
63 |
-
|
64 |
-
for member in cur_pop.members
|
65 |
-
size = countNodes(member.tree)
|
66 |
-
frequencyComplexity[size] += 1
|
67 |
-
if member.score < hallOfFame.members[size].score
|
68 |
-
hallOfFame.members[size] = deepcopy(member)
|
69 |
-
hallOfFame.exists[size] = true
|
70 |
-
end
|
71 |
-
end
|
72 |
-
|
73 |
-
# Dominating pareto curve - must be better than all simpler equations
|
74 |
-
dominating = PopMember[]
|
75 |
-
open(hofFile, "w") do io
|
76 |
-
println(io,"Complexity|MSE|Equation")
|
77 |
-
for size=1:actualMaxsize
|
78 |
-
if hallOfFame.exists[size]
|
79 |
-
member = hallOfFame.members[size]
|
80 |
-
if weighted
|
81 |
-
curMSE = MSE(evalTreeArray(member.tree), y, weights)
|
82 |
-
else
|
83 |
-
curMSE = MSE(evalTreeArray(member.tree), y)
|
84 |
-
end
|
85 |
-
numberSmallerAndBetter = 0
|
86 |
-
for i=1:(size-1)
|
87 |
-
if weighted
|
88 |
-
hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
|
89 |
-
else
|
90 |
-
hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
|
91 |
-
end
|
92 |
-
if (hallOfFame.exists[size] && curMSE > hofMSE)
|
93 |
-
numberSmallerAndBetter += 1
|
94 |
-
end
|
95 |
-
end
|
96 |
-
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
97 |
-
if betterThanAllSmaller
|
98 |
-
println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
|
99 |
-
push!(dominating, member)
|
100 |
-
end
|
101 |
-
end
|
102 |
-
end
|
103 |
-
end
|
104 |
-
cp(hofFile, hofFile*".bkup", force=true)
|
105 |
-
|
106 |
-
# Try normal copy otherwise.
|
107 |
-
if migration
|
108 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplaced))
|
109 |
-
to_copy = rand(1:size(bestPops.members)[1])
|
110 |
-
cur_pop.members[k] = PopMember(
|
111 |
-
copyNode(bestPops.members[to_copy].tree),
|
112 |
-
bestPops.members[to_copy].score)
|
113 |
-
end
|
114 |
-
end
|
115 |
-
|
116 |
-
if hofMigration && size(dominating)[1] > 0
|
117 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
|
118 |
-
# Copy in case one gets used twice
|
119 |
-
to_copy = rand(1:size(dominating)[1])
|
120 |
-
cur_pop.members[k] = PopMember(
|
121 |
-
copyNode(dominating[to_copy].tree)
|
122 |
-
)
|
123 |
-
end
|
124 |
-
end
|
125 |
-
|
126 |
-
@async begin
|
127 |
-
allPops[i] = @spawnat :any let
|
128 |
-
tmp_pop = run(cur_pop, ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
|
129 |
-
@inbounds @simd for j=1:tmp_pop.n
|
130 |
-
if rand() < 0.1
|
131 |
-
tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
|
132 |
-
tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
|
133 |
-
if shouldOptimizeConstants
|
134 |
-
tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
|
135 |
-
end
|
136 |
-
end
|
137 |
-
end
|
138 |
-
tmp_pop = finalizeScores(tmp_pop)
|
139 |
-
tmp_pop
|
140 |
-
end
|
141 |
-
put!(channels[i], fetch(allPops[i]))
|
142 |
-
end
|
143 |
-
|
144 |
-
cycles_complete -= 1
|
145 |
-
cycles_elapsed = npopulations * niterations - cycles_complete
|
146 |
-
if warmupMaxsize != 0 && cycles_elapsed % warmupMaxsize == 0
|
147 |
-
curmaxsize += 1
|
148 |
-
if curmaxsize > maxsize
|
149 |
-
curmaxsize = maxsize
|
150 |
-
end
|
151 |
-
end
|
152 |
-
num_equations += ncyclesperiteration * npop / 10.0
|
153 |
-
end
|
154 |
-
end
|
155 |
-
sleep(1e-3)
|
156 |
-
elapsed = time() - last_print_time
|
157 |
-
#Update if time has passed, and some new equations generated.
|
158 |
-
if elapsed > print_every_n_seconds && num_equations > 0.0
|
159 |
-
# Dominating pareto curve - must be better than all simpler equations
|
160 |
-
current_speed = num_equations/elapsed
|
161 |
-
average_over_m_measurements = 10 #for print_every...=5, this gives 50 second running average
|
162 |
-
push!(equation_speed, current_speed)
|
163 |
-
if length(equation_speed) > average_over_m_measurements
|
164 |
-
deleteat!(equation_speed, 1)
|
165 |
-
end
|
166 |
-
average_speed = sum(equation_speed)/length(equation_speed)
|
167 |
-
curMSE = baselineMSE
|
168 |
-
lastMSE = curMSE
|
169 |
-
lastComplexity = 0
|
170 |
-
if verbosity > 0
|
171 |
-
@printf("\n")
|
172 |
-
@printf("Cycles per second: %.3e\n", round(average_speed, sigdigits=3))
|
173 |
-
cycles_elapsed = npopulations * niterations - cycles_complete
|
174 |
-
@printf("Progress: %d / %d total iterations (%.3f%%)\n", cycles_elapsed, npopulations * niterations, 100.0*cycles_elapsed/(npopulations*niterations))
|
175 |
-
@printf("Hall of Fame:\n")
|
176 |
-
@printf("-----------------------------------------\n")
|
177 |
-
@printf("%-10s %-8s %-8s %-8s\n", "Complexity", "MSE", "Score", "Equation")
|
178 |
-
@printf("%-10d %-8.3e %-8.3e %-.f\n", 0, curMSE, 0f0, avgy)
|
179 |
-
end
|
180 |
-
|
181 |
-
for size=1:actualMaxsize
|
182 |
-
if hallOfFame.exists[size]
|
183 |
-
member = hallOfFame.members[size]
|
184 |
-
if weighted
|
185 |
-
curMSE = MSE(evalTreeArray(member.tree), y, weights)
|
186 |
-
else
|
187 |
-
curMSE = MSE(evalTreeArray(member.tree), y)
|
188 |
-
end
|
189 |
-
numberSmallerAndBetter = 0
|
190 |
-
for i=1:(size-1)
|
191 |
-
if weighted
|
192 |
-
hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
|
193 |
-
else
|
194 |
-
hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
|
195 |
-
end
|
196 |
-
if (hallOfFame.exists[size] && curMSE > hofMSE)
|
197 |
-
numberSmallerAndBetter += 1
|
198 |
-
end
|
199 |
-
end
|
200 |
-
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
201 |
-
if betterThanAllSmaller
|
202 |
-
delta_c = size - lastComplexity
|
203 |
-
delta_l_mse = log(curMSE/lastMSE)
|
204 |
-
score = convert(Float32, -delta_l_mse/delta_c)
|
205 |
-
if verbosity > 0
|
206 |
-
@printf("%-10d %-8.3e %-8.3e %-s\n" , size, curMSE, score, stringTree(member.tree))
|
207 |
-
end
|
208 |
-
lastMSE = curMSE
|
209 |
-
lastComplexity = size
|
210 |
-
end
|
211 |
-
end
|
212 |
-
end
|
213 |
-
debug(verbosity, "")
|
214 |
-
last_print_time = time()
|
215 |
-
num_equations = 0.0
|
216 |
-
end
|
217 |
-
end
|
218 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/truth.jl
DELETED
@@ -1,77 +0,0 @@
|
|
1 |
-
# *** Custom Functions
|
2 |
-
##################################################################################################################################
|
3 |
-
# *** Will somewhere need to define a list TRUTHS of all valid auxliary truths
|
4 |
-
struct Transformation
|
5 |
-
type::Integer # 1 is symmetry, 2 is zero, 3 is equality
|
6 |
-
params::Array{Int32}
|
7 |
-
Transformation(type::Integer, params::Array{Int32}) = new(type, params)
|
8 |
-
Transformation(type::Integer, params::Array{Int64}) = new(type, params)
|
9 |
-
|
10 |
-
end
|
11 |
-
struct Truth
|
12 |
-
transformation::Transformation
|
13 |
-
weights::Array{Float32}
|
14 |
-
Truth(transformation::Transformation, weights::Array{Float32}) = new(transformation, weights)
|
15 |
-
Truth(type::Int64, params::Array{Int64}, weights::Array{Float32}) = new(Transformation(type, params), weights)
|
16 |
-
Truth(transformation::Transformation, weights::Array{Float64}) = new(transformation, weights)
|
17 |
-
Truth(type::Int64, params::Array{Int64}, weights::Array{Float64}) = new(Transformation(type, params), weights)
|
18 |
-
end
|
19 |
-
# Returns a linear combination when given X of shape nxd, y of shape nx1 is f(x) and w of shape d+2x1, result is shape nx1
|
20 |
-
function LinearPrediction(cX::Array{Float32}, cy::Array{Float32}, w::Array{Float32})::Array{Float32}
|
21 |
-
preds = 0
|
22 |
-
for i in 1:ndims(cX)
|
23 |
-
preds = preds .+ cX[:,i].*w[i]
|
24 |
-
end
|
25 |
-
preds = preds .+ cy.*w[ndims(cX)+1]
|
26 |
-
return preds .+ w[ndims(cX)+2]
|
27 |
-
end
|
28 |
-
|
29 |
-
# Returns a copy of the data with the two specified columns swapped
|
30 |
-
function swapColumns(cX::Array{Float32, 2}, a::Integer, b::Integer)::Array{Float32, 2}
|
31 |
-
X1 = copy(cX)
|
32 |
-
X1[:, a] = cX[:, b]
|
33 |
-
X1[:, b] = cX[:, a]
|
34 |
-
return X1
|
35 |
-
end
|
36 |
-
|
37 |
-
# Returns a copy of the data with the specified integers in the list set to value given
|
38 |
-
function setVal(cX::Array{Float32, 2}, a::Array{Int32, 1}, val::Float32)::Array{Float32, 2}
|
39 |
-
X1 = copy(cX)
|
40 |
-
for i in 1:size(a)[1]
|
41 |
-
X1[:, a[i]] = fill!(cX[:, a[i]], val)
|
42 |
-
end
|
43 |
-
return X1
|
44 |
-
end
|
45 |
-
|
46 |
-
# Returns a copy of the data with the specified integer indices in the list set to the first item of that list
|
47 |
-
function setEq(cX::Array{Float32, 2}, a::Array{Int32, 1})::Array{Float32, 2}
|
48 |
-
X1 = copy(cX)
|
49 |
-
val = X1[:, a[1]]
|
50 |
-
for i in 1:size(a)[1]
|
51 |
-
X1[:, a[i]] = val
|
52 |
-
end
|
53 |
-
return X1
|
54 |
-
end
|
55 |
-
|
56 |
-
# Takes in a dataset and returns the transformed version of it as per the specified type and parameters
|
57 |
-
function transform(cX::Array{Float32, 2}, transformation::Transformation)::Array{Float32, 2}
|
58 |
-
if transformation.type==1 # then symmetry
|
59 |
-
a = transformation.params[1]
|
60 |
-
b = transformation.params[2]
|
61 |
-
return swapColumns(cX, a, b)
|
62 |
-
elseif transformation.type==2 # then zero condition
|
63 |
-
return setVal(cX, transformation.params, Float32(0))
|
64 |
-
elseif transformation.type == 3 # then equality condition
|
65 |
-
return setEq(cX, transformation.params)
|
66 |
-
else # Then error return X
|
67 |
-
return cX
|
68 |
-
end
|
69 |
-
end
|
70 |
-
function transform(cX::Array{Float32, 2}, truth::Truth)::Array{Float32, 2}
|
71 |
-
return transform(cX, truth.transformation)
|
72 |
-
end
|
73 |
-
|
74 |
-
# Takes in X that has been transformed and returns what the Truth projects the target values should be
|
75 |
-
function truthPrediction(X_transformed::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Array{Float32}
|
76 |
-
return LinearPrediction(X_transformed, cy, truth.weights)
|
77 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/truthPops.jl
DELETED
@@ -1,170 +0,0 @@
|
|
1 |
-
# Returns the MSE between the predictions and the truth provided targets for the given dataset
|
2 |
-
function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
|
3 |
-
transformed = transform(cX, truth)
|
4 |
-
targets = truthPrediction(transformed, cy, truth)
|
5 |
-
preds = evalTreeArray(member.tree, transformed)
|
6 |
-
return MSE(preds, targets)
|
7 |
-
end
|
8 |
-
|
9 |
-
# Assumes a dataset X, y for a given truth
|
10 |
-
function truthScore(member::PopMember, truth::Truth)::Float32
|
11 |
-
return truthScore(member, X, y, truth)
|
12 |
-
end
|
13 |
-
|
14 |
-
# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
|
15 |
-
function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
|
16 |
-
s = 0
|
17 |
-
for truth in TRUTHS
|
18 |
-
s += (truthScore(member, cX, cy, truth))/size(TRUTHS)[1]
|
19 |
-
end
|
20 |
-
return s
|
21 |
-
end
|
22 |
-
|
23 |
-
# Assumes list of Truths TRUTHS and dataset X, y are defined
|
24 |
-
function truthScore(member::PopMember)::Float32
|
25 |
-
return truthScore(member, X, y)
|
26 |
-
end
|
27 |
-
# Returns the MSE between the predictions and the truth provided targets for the given dataset
|
28 |
-
function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
|
29 |
-
transformed = transform(cX, truth)
|
30 |
-
targets = truthPrediction(transformed, cy, truth)
|
31 |
-
preds = evalTreeArray(tree, transformed)
|
32 |
-
return MSE(preds, targets)
|
33 |
-
end
|
34 |
-
|
35 |
-
# Assumes a dataset X, y for a given truth
|
36 |
-
function truthScore(tree::Node, truth::Truth)::Float32
|
37 |
-
return truthScore(tree, X, y, truth)
|
38 |
-
end
|
39 |
-
|
40 |
-
# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
|
41 |
-
function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
|
42 |
-
s = 0
|
43 |
-
for truth in TRUTHS
|
44 |
-
s += (truthScore(tree, cX, cy, truth))/size(TRUTHS)[1]
|
45 |
-
end
|
46 |
-
return s
|
47 |
-
end
|
48 |
-
|
49 |
-
# Assumes list of Truths TRUTHS and dataset X, y are defined
|
50 |
-
function truthScore(tree::Node)::Float32
|
51 |
-
return truthScore(tree, X, y)
|
52 |
-
end
|
53 |
-
|
54 |
-
# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
|
55 |
-
function testTruth(member::PopMember, truth::Truth, threshold::Float32=Float32(1.0e-8))::Bool
|
56 |
-
truthError = truthScore(member, truth)
|
57 |
-
#print(stringTree(member.tree), "\n")
|
58 |
-
#print(truth, ": ")
|
59 |
-
#print(truthError, "\n")
|
60 |
-
if truthError > threshold
|
61 |
-
#print("Returns False \n ----\n")
|
62 |
-
return false
|
63 |
-
else
|
64 |
-
#print("Returns True \n ----\n")
|
65 |
-
return true
|
66 |
-
end
|
67 |
-
end
|
68 |
-
|
69 |
-
# Returns a list of violating functions from assumed list TRUTHS
|
70 |
-
function violatingTruths(member::PopMember)::Array{Truth}
|
71 |
-
return violatingTruths(member.tree)
|
72 |
-
end
|
73 |
-
|
74 |
-
# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
|
75 |
-
function testTruth(tree::Node, truth::Truth, threshold::Float32=Float32(1.0e-3))::Bool
|
76 |
-
truthError = truthScore(tree, truth)
|
77 |
-
if truthError > threshold
|
78 |
-
return false
|
79 |
-
else
|
80 |
-
return true
|
81 |
-
end
|
82 |
-
end
|
83 |
-
|
84 |
-
# Returns a list of violating functions from assumed list TRUTHS
|
85 |
-
function violatingTruths(tree::Node)::Array{Truth}
|
86 |
-
toReturn = []
|
87 |
-
#print("\n Checking Equation ", stringTree(tree), "\n")
|
88 |
-
for truth in TRUTHS
|
89 |
-
test_truth = testTruth(tree, truth)
|
90 |
-
#print("Truth: ", truth, ": " , test_truth, "\n-----\n")
|
91 |
-
if !test_truth
|
92 |
-
append!(toReturn, [truth])
|
93 |
-
end
|
94 |
-
end
|
95 |
-
return toReturn
|
96 |
-
end
|
97 |
-
|
98 |
-
function randomIndex(cX::Array{Float32, 2}, k::Integer=10)::Array{Int32, 1}
|
99 |
-
indxs = sample([Int32(i) for i in 1:size(cX)[1]], k)
|
100 |
-
return indxs
|
101 |
-
end
|
102 |
-
|
103 |
-
function randomIndex(leng::Integer, k::Integer=10)::Array{Int32, 1}
|
104 |
-
indxs = sample([Int32(i) for i in 1:leng], k)
|
105 |
-
return indxs
|
106 |
-
end
|
107 |
-
|
108 |
-
function extendedX(cX::Array{Float32, 2}, truth::Truth, indx::Array{Int32, 1})::Array{Float32, 2}
|
109 |
-
workingcX = copy(cX)
|
110 |
-
X_slice = workingcX[indx, :]
|
111 |
-
X_transformed = transform(X_slice, truth)
|
112 |
-
return X_transformed
|
113 |
-
end
|
114 |
-
function extendedX(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
|
115 |
-
return extendedX(OGX, truth, indx)
|
116 |
-
end
|
117 |
-
function extendedX(cX::Array{Float32, 2}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
|
118 |
-
if length(violatedTruths) == 0
|
119 |
-
return nothing
|
120 |
-
end
|
121 |
-
workingX = extendedX(cX, violatedTruths[1], indx)
|
122 |
-
for truth in violatedTruths[2:length(violatedTruths)]
|
123 |
-
workingX = vcat(workingX, extendedX(cX, truth, indx))
|
124 |
-
end
|
125 |
-
return workingX
|
126 |
-
end
|
127 |
-
function extendedX(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
|
128 |
-
return extendedX(OGX, violatedTruths, indx)
|
129 |
-
end
|
130 |
-
function extendedX(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
|
131 |
-
violatedTruths = violatingTruths(tree)
|
132 |
-
return extendedX(violatedTruths, indx)
|
133 |
-
end
|
134 |
-
function extendedX(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
|
135 |
-
return extendedX(member.tree, indx)
|
136 |
-
end
|
137 |
-
|
138 |
-
|
139 |
-
function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
140 |
-
cy = copy(cy)
|
141 |
-
cX = copy(cX)
|
142 |
-
X_slice = cX[indx, :]
|
143 |
-
y_slice = cy[indx]
|
144 |
-
X_transformed = transform(X_slice, truth)
|
145 |
-
y_transformed = truthPrediction(X_transformed, y_slice, truth)
|
146 |
-
return y_transformed
|
147 |
-
end
|
148 |
-
function extendedy(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
149 |
-
return extendedy(OGX, OGy, truth, indx)
|
150 |
-
end
|
151 |
-
function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
152 |
-
if length(violatedTruths) == 0
|
153 |
-
return nothing
|
154 |
-
end
|
155 |
-
workingy = extendedy(cX, cy, violatedTruths[1], indx)
|
156 |
-
for truth in violatedTruths[2:length(violatedTruths)]
|
157 |
-
workingy = vcat(workingy, extendedy(cX, cy, truth, indx))
|
158 |
-
end
|
159 |
-
return workingy
|
160 |
-
end
|
161 |
-
function extendedy(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
162 |
-
return extendedy(OGX,OGy, violatedTruths, indx)
|
163 |
-
end
|
164 |
-
function extendedy(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
165 |
-
violatedTruths = violatingTruths(tree)
|
166 |
-
return extendedy(violatedTruths, indx)
|
167 |
-
end
|
168 |
-
function extendedy(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
|
169 |
-
return extendedy(member.tree, indx)
|
170 |
-
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pysr/sr.py
CHANGED
@@ -100,13 +100,13 @@ def pysr(X=None, y=None, weights=None,
|
|
100 |
useFrequency=False,
|
101 |
tempdir=None,
|
102 |
delete_tempfiles=True,
|
103 |
-
limitPowComplexity=False, #deprecated
|
104 |
-
threads=None, #deprecated
|
105 |
julia_optimization=3,
|
|
|
|
|
106 |
):
|
107 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
108 |
Note: most default parameters have been tuned over several example
|
109 |
-
equations, but you should adjust `
|
110 |
`binary_operators`, `unary_operators` to your requirements.
|
111 |
|
112 |
:param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
|
@@ -188,12 +188,16 @@ def pysr(X=None, y=None, weights=None,
|
|
188 |
:param julia_optimization: int, Optimization level (0, 1, 2, 3)
|
189 |
:param tempdir: str or None, directory for the temporary files
|
190 |
:param delete_tempfiles: bool, whether to delete the temporary files after finishing
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
:returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
|
192 |
(as strings).
|
193 |
|
194 |
"""
|
195 |
-
_raise_depreciation_errors(limitPowComplexity, threads)
|
196 |
-
|
197 |
if isinstance(X, pd.DataFrame):
|
198 |
variable_names = list(X.columns)
|
199 |
X = np.array(X)
|
@@ -239,8 +243,7 @@ def pysr(X=None, y=None, weights=None,
|
|
239 |
topn=topn, verbosity=verbosity,
|
240 |
julia_optimization=julia_optimization, timeout=timeout,
|
241 |
fractionReplacedHof=fractionReplacedHof,
|
242 |
-
hofMigration=hofMigration,
|
243 |
-
limitPowComplexity=limitPowComplexity, maxdepth=maxdepth,
|
244 |
maxsize=maxsize, migration=migration, nrestarts=nrestarts,
|
245 |
parsimony=parsimony, perturbationFactor=perturbationFactor,
|
246 |
populations=populations, procs=procs,
|
@@ -257,17 +260,24 @@ def pysr(X=None, y=None, weights=None,
|
|
257 |
weightRandomize=weightRandomize,
|
258 |
weightSimplify=weightSimplify,
|
259 |
constraints=constraints,
|
260 |
-
extra_sympy_mappings=extra_sympy_mappings
|
|
|
261 |
|
262 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
263 |
|
264 |
-
kwargs['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
_handle_constraints(**kwargs)
|
267 |
|
268 |
kwargs['constraints_str'] = _make_constraints_str(**kwargs)
|
269 |
kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
|
270 |
-
kwargs['def_auxiliary'] = _make_auxiliary_julia_str(**kwargs)
|
271 |
kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
|
272 |
|
273 |
_create_julia_files(**kwargs)
|
@@ -280,13 +290,6 @@ def pysr(X=None, y=None, weights=None,
|
|
280 |
return get_hof(**kwargs)
|
281 |
|
282 |
|
283 |
-
def _make_auxiliary_julia_str(julia_auxiliary_filenames, **kwargs):
|
284 |
-
def_auxiliary = '\n'.join([
|
285 |
-
f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
|
286 |
-
])
|
287 |
-
return def_auxiliary
|
288 |
-
|
289 |
-
|
290 |
def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
|
291 |
global global_n_features
|
292 |
global global_equation_file
|
@@ -298,14 +301,16 @@ def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwarg
|
|
298 |
global_extra_sympy_mappings = extra_sympy_mappings
|
299 |
|
300 |
|
301 |
-
def _final_pysr_process(julia_optimization,
|
302 |
command = [
|
303 |
f'julia', f'-O{julia_optimization:d}',
|
304 |
-
f'-p', f'{procs}',
|
305 |
str(runfile_filename),
|
306 |
]
|
307 |
if timeout is not None:
|
308 |
command = [f'timeout', f'{timeout}'] + command
|
|
|
|
|
|
|
309 |
print("Running on", ' '.join(command))
|
310 |
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
|
311 |
try:
|
@@ -320,73 +325,108 @@ def _final_pysr_process(julia_optimization, procs, runfile_filename, timeout, **
|
|
320 |
print("Killing process... will return when done.")
|
321 |
process.kill()
|
322 |
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
326 |
with open(hyperparam_filename, 'w') as f:
|
327 |
print(def_hyperparams, file=f)
|
328 |
with open(dataset_filename, 'w') as f:
|
329 |
print(def_datasets, file=f)
|
330 |
-
with open(auxiliary_filename, 'w') as f:
|
331 |
-
print(def_auxiliary, file=f)
|
332 |
with open(runfile_filename, 'w') as f:
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
print(
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
|
343 |
def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
|
344 |
def_datasets = """using DelimitedFiles"""
|
345 |
np.savetxt(X_filename, X, delimiter=',')
|
346 |
-
np.savetxt(y_filename, y, delimiter=',')
|
347 |
if weights is not None:
|
348 |
-
np.savetxt(weights_filename, weights, delimiter=',')
|
349 |
def_datasets += f"""
|
350 |
-
|
351 |
-
|
352 |
if weights is not None:
|
353 |
def_datasets += f"""
|
354 |
-
|
355 |
return def_datasets
|
356 |
|
357 |
-
|
358 |
def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
359 |
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
360 |
-
|
361 |
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
362 |
-
unary_operators, useFrequency, use_custom_variable_names,
|
|
|
|
|
363 |
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
364 |
weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
{constraints_str}
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
const annealing = {"true" if annealing else "false"}
|
385 |
-
const weighted = {"true" if weights is not None else "false"}
|
386 |
-
const batching = {"true" if batching else "false"}
|
387 |
-
const batchSize = {min([batchSize, len(X)]) if batching else len(X):d}
|
388 |
-
const useVarMap = {"true" if use_custom_variable_names else "false"}
|
389 |
-
const mutationWeights = [
|
390 |
{weightMutateConstant:f},
|
391 |
{weightMutateOperator:f},
|
392 |
{weightAddNode:f},
|
@@ -395,53 +435,25 @@ const mutationWeights = [
|
|
395 |
{weightSimplify:f},
|
396 |
{weightRandomize:f},
|
397 |
{weightDoNothing:f}
|
398 |
-
]
|
399 |
-
|
400 |
-
|
401 |
-
|
|
|
|
|
|
|
|
|
402 |
"""
|
403 |
-
|
404 |
-
if len(binary_operators) > 0:
|
405 |
-
op_runner += """
|
406 |
-
@inline function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int)
|
407 |
-
if i === 1
|
408 |
-
@inbounds @simd for j=1:clen
|
409 |
-
x[j] = """f"{binary_operators[0]}""""(x[j], y[j])
|
410 |
-
end"""
|
411 |
-
for i in range(1, len(binary_operators)):
|
412 |
-
op_runner += f"""
|
413 |
-
elseif i === {i + 1}
|
414 |
-
@inbounds @simd for j=1:clen
|
415 |
-
x[j] = {binary_operators[i]}(x[j], y[j])
|
416 |
-
end"""
|
417 |
-
op_runner += """
|
418 |
-
end
|
419 |
-
end"""
|
420 |
-
if len(unary_operators) > 0:
|
421 |
-
op_runner += """
|
422 |
-
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
423 |
-
if i === 1
|
424 |
-
@inbounds @simd for j=1:clen
|
425 |
-
x[j] = """f"{unary_operators[0]}(x[j])""""
|
426 |
-
end"""
|
427 |
-
for i in range(1, len(unary_operators)):
|
428 |
-
op_runner += f"""
|
429 |
-
elseif i === {i + 1}
|
430 |
-
@inbounds @simd for j=1:clen
|
431 |
-
x[j] = {unary_operators[i]}(x[j])
|
432 |
-
end"""
|
433 |
-
op_runner += """
|
434 |
-
end
|
435 |
-
end"""
|
436 |
-
def_hyperparams += op_runner
|
437 |
if use_custom_variable_names:
|
438 |
-
def_hyperparams += f"""
|
439 |
-
|
|
|
440 |
return def_hyperparams
|
441 |
|
442 |
|
443 |
def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
|
444 |
-
constraints_str = "
|
445 |
first = True
|
446 |
for op in unary_operators:
|
447 |
val = constraints[op]
|
@@ -449,8 +461,8 @@ def _make_constraints_str(binary_operators, constraints, unary_operators, **kwar
|
|
449 |
constraints_str += ", "
|
450 |
constraints_str += f"{val:d}"
|
451 |
first = False
|
452 |
-
constraints_str += """]
|
453 |
-
|
454 |
first = True
|
455 |
for op in binary_operators:
|
456 |
tup = constraints[op]
|
@@ -458,7 +470,7 @@ const bin_constraints = ["""
|
|
458 |
constraints_str += ", "
|
459 |
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
460 |
first = False
|
461 |
-
constraints_str += "]"
|
462 |
return constraints_str
|
463 |
|
464 |
|
@@ -481,7 +493,7 @@ def _handle_constraints(binary_operators, constraints, unary_operators, **kwargs
|
|
481 |
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
482 |
|
483 |
|
484 |
-
def
|
485 |
def_hyperparams = ""
|
486 |
for op_list in [binary_operators, unary_operators]:
|
487 |
for i in range(len(op_list)):
|
@@ -529,35 +541,20 @@ def _handle_feature_selection(X, select_k_features, use_custom_variable_names, v
|
|
529 |
|
530 |
def _set_paths(tempdir):
|
531 |
# System-independent paths
|
532 |
-
pkg_directory = Path(__file__).parents[1]
|
533 |
-
|
534 |
-
operator_filename = pkg_directory / "Operators.jl"
|
535 |
-
julia_auxiliaries = [
|
536 |
-
"Equation.jl", "ProgramConstants.jl",
|
537 |
-
"LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
|
538 |
-
"MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
|
539 |
-
"HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
|
540 |
-
"Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
|
541 |
-
"ConstantOptimization.jl"
|
542 |
-
]
|
543 |
-
julia_auxiliary_filenames = [
|
544 |
-
pkg_directory / fname
|
545 |
-
for fname in julia_auxiliaries
|
546 |
-
]
|
547 |
-
|
548 |
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
549 |
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
550 |
dataset_filename = tmpdir / f'dataset.jl'
|
551 |
-
auxiliary_filename = tmpdir / f'auxiliary.jl'
|
552 |
runfile_filename = tmpdir / f'runfile.jl'
|
553 |
X_filename = tmpdir / "X.csv"
|
554 |
y_filename = tmpdir / "y.csv"
|
555 |
weights_filename = tmpdir / "weights.csv"
|
556 |
-
return dict(
|
|
|
|
|
557 |
dataset_filename=dataset_filename,
|
558 |
hyperparam_filename=hyperparam_filename,
|
559 |
-
julia_auxiliary_filenames=julia_auxiliary_filenames,
|
560 |
-
operator_filename=operator_filename, pkg_filename=pkg_filename,
|
561 |
runfile_filename=runfile_filename, tmpdir=tmpdir,
|
562 |
weights_filename=weights_filename, y_filename=y_filename)
|
563 |
|
@@ -575,13 +572,6 @@ def _check_assertions(X, binary_operators, unary_operators, use_custom_variable_
|
|
575 |
assert len(variable_names) == X.shape[1]
|
576 |
|
577 |
|
578 |
-
def _raise_depreciation_errors(limitPowComplexity, threads):
|
579 |
-
if threads is not None:
|
580 |
-
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
581 |
-
if limitPowComplexity:
|
582 |
-
raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
|
583 |
-
|
584 |
-
|
585 |
def run_feature_selection(X, y, select_k_features):
|
586 |
"""Use a gradient boosting tree regressor as a proxy for finding
|
587 |
the k most important features in X, returning indices for those
|
@@ -695,3 +685,15 @@ def _escape_filename(filename):
|
|
695 |
repr = str(filename)
|
696 |
repr = repr.replace('\\', '\\\\')
|
697 |
return repr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
useFrequency=False,
|
101 |
tempdir=None,
|
102 |
delete_tempfiles=True,
|
|
|
|
|
103 |
julia_optimization=3,
|
104 |
+
julia_project=None,
|
105 |
+
user_input=True
|
106 |
):
|
107 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
108 |
Note: most default parameters have been tuned over several example
|
109 |
+
equations, but you should adjust `niterations`,
|
110 |
`binary_operators`, `unary_operators` to your requirements.
|
111 |
|
112 |
:param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
|
|
|
188 |
:param julia_optimization: int, Optimization level (0, 1, 2, 3)
|
189 |
:param tempdir: str or None, directory for the temporary files
|
190 |
:param delete_tempfiles: bool, whether to delete the temporary files after finishing
|
191 |
+
:param julia_project: str or None, a Julia environment location containing
|
192 |
+
a Project.toml (and potentially the source code for SymbolicRegression.jl).
|
193 |
+
Default gives the Python package directory, where a Project.toml file
|
194 |
+
should be present from the install.
|
195 |
+
:param user_input: Whether to ask for user input or not for installing (to
|
196 |
+
be used for automated scripts). Will choose to install when asked.
|
197 |
:returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
|
198 |
(as strings).
|
199 |
|
200 |
"""
|
|
|
|
|
201 |
if isinstance(X, pd.DataFrame):
|
202 |
variable_names = list(X.columns)
|
203 |
X = np.array(X)
|
|
|
243 |
topn=topn, verbosity=verbosity,
|
244 |
julia_optimization=julia_optimization, timeout=timeout,
|
245 |
fractionReplacedHof=fractionReplacedHof,
|
246 |
+
hofMigration=hofMigration, maxdepth=maxdepth,
|
|
|
247 |
maxsize=maxsize, migration=migration, nrestarts=nrestarts,
|
248 |
parsimony=parsimony, perturbationFactor=perturbationFactor,
|
249 |
populations=populations, procs=procs,
|
|
|
260 |
weightRandomize=weightRandomize,
|
261 |
weightSimplify=weightSimplify,
|
262 |
constraints=constraints,
|
263 |
+
extra_sympy_mappings=extra_sympy_mappings,
|
264 |
+
julia_project=julia_project)
|
265 |
|
266 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
267 |
|
268 |
+
pkg_directory = kwargs['pkg_directory']
|
269 |
+
kwargs['need_install'] = False
|
270 |
+
if not (pkg_directory / 'Manifest.toml').is_file():
|
271 |
+
kwargs['need_install'] = (not user_input) or _yesno("I will install Julia packages using PySR's Project.toml file. OK?")
|
272 |
+
if kwargs['need_install']:
|
273 |
+
print("OK. I will install at launch.")
|
274 |
+
|
275 |
+
kwargs['def_hyperparams'] = _create_inline_operators(**kwargs)
|
276 |
|
277 |
_handle_constraints(**kwargs)
|
278 |
|
279 |
kwargs['constraints_str'] = _make_constraints_str(**kwargs)
|
280 |
kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
|
|
|
281 |
kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
|
282 |
|
283 |
_create_julia_files(**kwargs)
|
|
|
290 |
return get_hof(**kwargs)
|
291 |
|
292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
|
294 |
global global_n_features
|
295 |
global global_equation_file
|
|
|
301 |
global_extra_sympy_mappings = extra_sympy_mappings
|
302 |
|
303 |
|
304 |
+
def _final_pysr_process(julia_optimization, runfile_filename, timeout, **kwargs):
|
305 |
command = [
|
306 |
f'julia', f'-O{julia_optimization:d}',
|
|
|
307 |
str(runfile_filename),
|
308 |
]
|
309 |
if timeout is not None:
|
310 |
command = [f'timeout', f'{timeout}'] + command
|
311 |
+
_cmd_runner(command)
|
312 |
+
|
313 |
+
def _cmd_runner(command):
|
314 |
print("Running on", ' '.join(command))
|
315 |
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
|
316 |
try:
|
|
|
325 |
print("Killing process... will return when done.")
|
326 |
process.kill()
|
327 |
|
328 |
+
def _create_julia_files(dataset_filename, def_datasets, hyperparam_filename, def_hyperparams,
|
329 |
+
fractionReplaced, ncyclesperiteration, niterations, npop,
|
330 |
+
runfile_filename, topn, verbosity, julia_project, procs, weights,
|
331 |
+
X, variable_names, pkg_directory, need_install, **kwargs):
|
332 |
with open(hyperparam_filename, 'w') as f:
|
333 |
print(def_hyperparams, file=f)
|
334 |
with open(dataset_filename, 'w') as f:
|
335 |
print(def_datasets, file=f)
|
|
|
|
|
336 |
with open(runfile_filename, 'w') as f:
|
337 |
+
if julia_project is None:
|
338 |
+
julia_project = pkg_directory
|
339 |
+
else:
|
340 |
+
julia_project = Path(julia_project)
|
341 |
+
print(f'import Pkg', file=f)
|
342 |
+
print(f'Pkg.activate("{_escape_filename(julia_project)}")', file=f)
|
343 |
+
if need_install:
|
344 |
+
print(f'Pkg.add("SymbolicRegression")', file=f)
|
345 |
+
print(f'Pkg.instantiate()', file=f)
|
346 |
+
print(f'Pkg.precompile()', file=f)
|
347 |
+
print(f'using SymbolicRegression', file=f)
|
348 |
+
print(f'include("{_escape_filename(hyperparam_filename)}")', file=f)
|
349 |
+
print(f'include("{_escape_filename(dataset_filename)}")', file=f)
|
350 |
+
if len(variable_names) == 0:
|
351 |
+
varMap = "[" + ",".join([f'"x{i}"' for i in range(X.shape[1])]) + "]"
|
352 |
+
else:
|
353 |
+
varMap = "[" + ",".join(variable_names) + "]"
|
354 |
+
|
355 |
+
if weights is not None:
|
356 |
+
print(f'EquationSearch(X, y, weights=weights, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
|
357 |
+
else:
|
358 |
+
print(f'EquationSearch(X, y, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
|
359 |
|
360 |
|
361 |
def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
|
362 |
def_datasets = """using DelimitedFiles"""
|
363 |
np.savetxt(X_filename, X, delimiter=',')
|
364 |
+
np.savetxt(y_filename, y.reshape(-1, 1), delimiter=',')
|
365 |
if weights is not None:
|
366 |
+
np.savetxt(weights_filename, weights.reshape(-1, 1), delimiter=',')
|
367 |
def_datasets += f"""
|
368 |
+
X = copy(transpose(readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')))
|
369 |
+
y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')[:, 1]"""
|
370 |
if weights is not None:
|
371 |
def_datasets += f"""
|
372 |
+
weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')[:, 1]"""
|
373 |
return def_datasets
|
374 |
|
|
|
375 |
def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
376 |
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
377 |
+
maxdepth, maxsize, migration, nrestarts, npop,
|
378 |
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
379 |
+
unary_operators, useFrequency, use_custom_variable_names,
|
380 |
+
variable_names, warmupMaxsize, weightAddNode,
|
381 |
+
ncyclesperiteration, fractionReplaced, topn, verbosity,
|
382 |
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
383 |
weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
|
384 |
+
def tuple_fix(ops):
|
385 |
+
if len(ops) > 1:
|
386 |
+
return ', '.join(ops)
|
387 |
+
elif len(ops) == 0:
|
388 |
+
return ''
|
389 |
+
else:
|
390 |
+
return ops[0] + ','
|
391 |
+
|
392 |
+
def_hyperparams += f"""\n
|
393 |
+
plus=(+)
|
394 |
+
sub=(-)
|
395 |
+
mult=(*)
|
396 |
+
square=SymbolicRegression.square
|
397 |
+
cube=SymbolicRegression.cube
|
398 |
+
pow=(^)
|
399 |
+
div=(/)
|
400 |
+
logm=SymbolicRegression.logm
|
401 |
+
logm2=SymbolicRegression.logm2
|
402 |
+
logm10=SymbolicRegression.logm10
|
403 |
+
sqrtm=SymbolicRegression.sqrtm
|
404 |
+
neg=SymbolicRegression.neg
|
405 |
+
greater=SymbolicRegression.greater
|
406 |
+
relu=SymbolicRegression.relu
|
407 |
+
logical_or=SymbolicRegression.logical_or
|
408 |
+
logical_and=SymbolicRegression.logical_and
|
409 |
+
|
410 |
+
options = SymbolicRegression.Options(binary_operators={'(' + tuple_fix(binary_operators) + ')'},
|
411 |
+
unary_operators={'(' + tuple_fix(unary_operators) + ')'},
|
412 |
{constraints_str}
|
413 |
+
parsimony={parsimony:f}f0,
|
414 |
+
alpha={alpha:f}f0,
|
415 |
+
maxsize={maxsize:d},
|
416 |
+
maxdepth={maxdepth:d},
|
417 |
+
fast_cycle={'true' if fast_cycle else 'false'},
|
418 |
+
migration={'true' if migration else 'false'},
|
419 |
+
hofMigration={'true' if hofMigration else 'false'},
|
420 |
+
fractionReplacedHof={fractionReplacedHof}f0,
|
421 |
+
shouldOptimizeConstants={'true' if shouldOptimizeConstants else 'false'},
|
422 |
+
hofFile="{equation_file}",
|
423 |
+
npopulations={populations:d},
|
424 |
+
nrestarts={nrestarts:d},
|
425 |
+
perturbationFactor={perturbationFactor:f}f0,
|
426 |
+
annealing={"true" if annealing else "false"},
|
427 |
+
batching={"true" if batching else "false"},
|
428 |
+
batchSize={min([batchSize, len(X)]) if batching else len(X):d},
|
429 |
+
mutationWeights=[
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
{weightMutateConstant:f},
|
431 |
{weightMutateOperator:f},
|
432 |
{weightAddNode:f},
|
|
|
435 |
{weightSimplify:f},
|
436 |
{weightRandomize:f},
|
437 |
{weightDoNothing:f}
|
438 |
+
],
|
439 |
+
warmupMaxsize={warmupMaxsize:d},
|
440 |
+
useFrequency={"true" if useFrequency else "false"},
|
441 |
+
npop={npop:d},
|
442 |
+
ncyclesperiteration={ncyclesperiteration:d},
|
443 |
+
fractionReplaced={fractionReplaced:f}f0,
|
444 |
+
topn={topn:d},
|
445 |
+
verbosity=round(Int32, {verbosity:f})
|
446 |
"""
|
447 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
if use_custom_variable_names:
|
449 |
+
def_hyperparams += f""",
|
450 |
+
varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
451 |
+
def_hyperparams += '\n)'
|
452 |
return def_hyperparams
|
453 |
|
454 |
|
455 |
def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
|
456 |
+
constraints_str = "una_constraints = ["
|
457 |
first = True
|
458 |
for op in unary_operators:
|
459 |
val = constraints[op]
|
|
|
461 |
constraints_str += ", "
|
462 |
constraints_str += f"{val:d}"
|
463 |
first = False
|
464 |
+
constraints_str += """],
|
465 |
+
bin_constraints = ["""
|
466 |
first = True
|
467 |
for op in binary_operators:
|
468 |
tup = constraints[op]
|
|
|
470 |
constraints_str += ", "
|
471 |
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
472 |
first = False
|
473 |
+
constraints_str += "],"
|
474 |
return constraints_str
|
475 |
|
476 |
|
|
|
493 |
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
494 |
|
495 |
|
496 |
+
def _create_inline_operators(binary_operators, unary_operators, **kwargs):
|
497 |
def_hyperparams = ""
|
498 |
for op_list in [binary_operators, unary_operators]:
|
499 |
for i in range(len(op_list)):
|
|
|
541 |
|
542 |
def _set_paths(tempdir):
|
543 |
# System-independent paths
|
544 |
+
pkg_directory = Path(__file__).parents[1]
|
545 |
+
default_project_file = pkg_directory / "Project.toml"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
547 |
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
548 |
dataset_filename = tmpdir / f'dataset.jl'
|
|
|
549 |
runfile_filename = tmpdir / f'runfile.jl'
|
550 |
X_filename = tmpdir / "X.csv"
|
551 |
y_filename = tmpdir / "y.csv"
|
552 |
weights_filename = tmpdir / "weights.csv"
|
553 |
+
return dict(pkg_directory=pkg_directory,
|
554 |
+
default_project_file=default_project_file,
|
555 |
+
X_filename=X_filename,
|
556 |
dataset_filename=dataset_filename,
|
557 |
hyperparam_filename=hyperparam_filename,
|
|
|
|
|
558 |
runfile_filename=runfile_filename, tmpdir=tmpdir,
|
559 |
weights_filename=weights_filename, y_filename=y_filename)
|
560 |
|
|
|
572 |
assert len(variable_names) == X.shape[1]
|
573 |
|
574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
def run_feature_selection(X, y, select_k_features):
|
576 |
"""Use a gradient boosting tree regressor as a proxy for finding
|
577 |
the k most important features in X, returning indices for those
|
|
|
685 |
repr = str(filename)
|
686 |
repr = repr.replace('\\', '\\\\')
|
687 |
return repr
|
688 |
+
|
689 |
+
# https://gist.github.com/garrettdreyfus/8153571
|
690 |
+
def _yesno(question):
|
691 |
+
"""Simple Yes/No Function."""
|
692 |
+
prompt = f'{question} (y/n): '
|
693 |
+
ans = input(prompt).strip().lower()
|
694 |
+
if ans not in ['y', 'n']:
|
695 |
+
print(f'{ans} is invalid, please try again...')
|
696 |
+
return _yesno(question)
|
697 |
+
if ans == 'y':
|
698 |
+
return True
|
699 |
+
return False
|
setup.py
CHANGED
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="pysr", # Replace with your own username
|
8 |
-
version="0.
|
9 |
author="Miles Cranmer",
|
10 |
author_email="[email protected]",
|
11 |
description="Simple and efficient symbolic regression",
|
@@ -19,7 +19,7 @@ setuptools.setup(
|
|
19 |
],
|
20 |
packages=setuptools.find_packages(),
|
21 |
package_data={
|
22 |
-
'pysr': ['../
|
23 |
},
|
24 |
include_package_data=False,
|
25 |
classifiers=[
|
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="pysr", # Replace with your own username
|
8 |
+
version="0.4.0",
|
9 |
author="Miles Cranmer",
|
10 |
author_email="[email protected]",
|
11 |
description="Simple and efficient symbolic regression",
|
|
|
19 |
],
|
20 |
packages=setuptools.find_packages(),
|
21 |
package_data={
|
22 |
+
'pysr': ['../Project.toml']
|
23 |
},
|
24 |
include_package_data=False,
|
25 |
classifiers=[
|
test/test.py
CHANGED
@@ -7,16 +7,16 @@ y = X[:, 0]
|
|
7 |
equations = pysr(X, y,
|
8 |
niterations=10)
|
9 |
print(equations)
|
10 |
-
assert equations.iloc[-1]['MSE'] < 1e-
|
11 |
|
12 |
print("Test 2 - test custom operator")
|
13 |
y = X[:, 0]**2
|
14 |
equations = pysr(X, y,
|
15 |
-
unary_operators=["
|
16 |
extra_sympy_mappings={'square': lambda x: x**2},
|
17 |
niterations=10)
|
18 |
print(equations)
|
19 |
-
assert equations.iloc[-1]['MSE'] < 1e-
|
20 |
|
21 |
X = np.random.randn(100, 1)
|
22 |
y = X[:, 0] + 3.0
|
@@ -26,4 +26,4 @@ equations = pysr(X, y,
|
|
26 |
niterations=10)
|
27 |
|
28 |
print(equations)
|
29 |
-
assert equations.iloc[-1]['MSE'] < 1e-
|
|
|
7 |
equations = pysr(X, y,
|
8 |
niterations=10)
|
9 |
print(equations)
|
10 |
+
assert equations.iloc[-1]['MSE'] < 1e-4
|
11 |
|
12 |
print("Test 2 - test custom operator")
|
13 |
y = X[:, 0]**2
|
14 |
equations = pysr(X, y,
|
15 |
+
unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
|
16 |
extra_sympy_mappings={'square': lambda x: x**2},
|
17 |
niterations=10)
|
18 |
print(equations)
|
19 |
+
assert equations.iloc[-1]['MSE'] < 1e-4
|
20 |
|
21 |
X = np.random.randn(100, 1)
|
22 |
y = X[:, 0] + 3.0
|
|
|
26 |
niterations=10)
|
27 |
|
28 |
print(equations)
|
29 |
+
assert equations.iloc[-1]['MSE'] < 1e-4
|