Spaces:
Running
Running
MilesCranmer
commited on
Commit
·
cedbbde
1
Parent(s):
b18ab5a
Optimize hall of fame constants with LBFGS
Browse files- README.md +4 -4
- eureqa.jl +41 -15
- paralleleureqa.jl +12 -5
README.md
CHANGED
@@ -77,10 +77,6 @@ weights = [8, 1, 1, 1, 0.1, 2]
|
|
77 |
|
78 |
# TODO
|
79 |
|
80 |
-
- [ ] Explicit constant optimization on hall-of-fame
|
81 |
-
- Create method to find and return all constants, from left to right
|
82 |
-
- Create method to find and set all constants, in same order
|
83 |
-
- Pull up some optimization algorithm and add it. No need for gradients; that's a headache. Keep the package small!
|
84 |
- [ ] Hyperparameter tune
|
85 |
- [ ] Create a Python interface
|
86 |
- [ ] Create a benchmark for accuracy
|
@@ -94,6 +90,10 @@ weights = [8, 1, 1, 1, 0.1, 2]
|
|
94 |
- Seems like its necessary right now. But still by far the slowest option.
|
95 |
- [ ] Calculating the loss function - there is duplicate calculations happening.
|
96 |
- [ ] Declaration of the weights array every iteration
|
|
|
|
|
|
|
|
|
97 |
- [x] Create a benchmark for speed
|
98 |
- [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
|
99 |
- [x] Record hall of fame
|
|
|
77 |
|
78 |
# TODO
|
79 |
|
|
|
|
|
|
|
|
|
80 |
- [ ] Hyperparameter tune
|
81 |
- [ ] Create a Python interface
|
82 |
- [ ] Create a benchmark for accuracy
|
|
|
90 |
- Seems like its necessary right now. But still by far the slowest option.
|
91 |
- [ ] Calculating the loss function - there is duplicate calculations happening.
|
92 |
- [ ] Declaration of the weights array every iteration
|
93 |
+
- [x] Explicit constant optimization on hall-of-fame
|
94 |
+
- Create method to find and return all constants, from left to right
|
95 |
+
- Create method to find and set all constants, in same order
|
96 |
+
- Pull up some optimization algorithm and add it. Keep the package small!
|
97 |
- [x] Create a benchmark for speed
|
98 |
- [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
|
99 |
- [x] Record hall of fame
|
eureqa.jl
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
include("hyperparams.jl")
|
2 |
include("dataset.jl")
|
|
|
3 |
|
4 |
const maxdegree = 2
|
5 |
const actualMaxsize = maxsize + maxdegree
|
@@ -14,6 +15,10 @@ function debug(verbosity, string...)
|
|
14 |
verbosity > 0 ? println(string...) : nothing
|
15 |
end
|
16 |
|
|
|
|
|
|
|
|
|
17 |
# Define a serialization format for the symbolic equations:
|
18 |
mutable struct Node
|
19 |
#Holds operators, variables, constants in a tree
|
@@ -222,9 +227,7 @@ function MSE(x::Array{Float32}, y::Array{Float32})::Float32
|
|
222 |
end
|
223 |
|
224 |
# Score an equation
|
225 |
-
function scoreFunc(
|
226 |
-
tree::Node;
|
227 |
-
parsimony::Float32=0.1f0)::Float32
|
228 |
try
|
229 |
return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
|
230 |
catch error
|
@@ -323,8 +326,7 @@ end
|
|
323 |
# exp(-delta/T) defines probability of accepting a change
|
324 |
function iterate(
|
325 |
tree::Node, T::Float32,
|
326 |
-
alpha::Float32=1.0f0
|
327 |
-
mult::Float32=0.1f0;
|
328 |
annealing::Bool=true
|
329 |
)::Node
|
330 |
prev = tree
|
@@ -353,8 +355,8 @@ function iterate(
|
|
353 |
end
|
354 |
|
355 |
if annealing
|
356 |
-
beforeLoss = scoreFunc(prev
|
357 |
-
afterLoss = scoreFunc(tree
|
358 |
delta = afterLoss - beforeLoss
|
359 |
probChange = exp(-delta/(T*alpha))
|
360 |
|
@@ -382,8 +384,8 @@ mutable struct PopMember
|
|
382 |
score::Float32
|
383 |
birth::Int32
|
384 |
|
385 |
-
PopMember(t::Node) = new(t, scoreFunc(t
|
386 |
-
PopMember(t::Node, score::Float32) = new(t, score,
|
387 |
|
388 |
end
|
389 |
|
@@ -413,9 +415,9 @@ function bestOfSample(pop::Population)::PopMember
|
|
413 |
end
|
414 |
|
415 |
# Return best 10 examples
|
416 |
-
function bestSubPop(pop::Population)::Population
|
417 |
best_idx = sortperm([pop.members[member].score for member=1:pop.n])
|
418 |
-
return Population(pop.members[best_idx[1:
|
419 |
end
|
420 |
|
421 |
# Mutate the best sampled member of the population
|
@@ -425,10 +427,10 @@ function iterateSample(
|
|
425 |
allstar = bestOfSample(pop)
|
426 |
new = iterate(
|
427 |
allstar.tree, T,
|
428 |
-
alpha,
|
429 |
allstar.tree = new
|
430 |
-
allstar.score = scoreFunc(new
|
431 |
-
allstar.birth =
|
432 |
return allstar
|
433 |
end
|
434 |
|
@@ -503,9 +505,33 @@ function setConstants(tree::Node, constants::Array{Float32, 1})
|
|
503 |
end
|
504 |
end
|
505 |
|
506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
function optimizeConstants(member::PopMember)::PopMember
|
|
|
|
|
|
|
|
|
508 |
x0 = getConstants(member.tree)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
return member
|
510 |
end
|
511 |
|
|
|
1 |
include("hyperparams.jl")
|
2 |
include("dataset.jl")
|
3 |
+
import Optim
|
4 |
|
5 |
const maxdegree = 2
|
6 |
const actualMaxsize = maxsize + maxdegree
|
|
|
15 |
verbosity > 0 ? println(string...) : nothing
|
16 |
end
|
17 |
|
18 |
+
function giveBirth()::Int32
|
19 |
+
return round(Int32, 1e3*(time()-1.6e9))
|
20 |
+
end
|
21 |
+
|
22 |
# Define a serialization format for the symbolic equations:
|
23 |
mutable struct Node
|
24 |
#Holds operators, variables, constants in a tree
|
|
|
227 |
end
|
228 |
|
229 |
# Score an equation
|
230 |
+
function scoreFunc(tree::Node)::Float32
|
|
|
|
|
231 |
try
|
232 |
return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
|
233 |
catch error
|
|
|
326 |
# exp(-delta/T) defines probability of accepting a change
|
327 |
function iterate(
|
328 |
tree::Node, T::Float32,
|
329 |
+
alpha::Float32=1.0f0;
|
|
|
330 |
annealing::Bool=true
|
331 |
)::Node
|
332 |
prev = tree
|
|
|
355 |
end
|
356 |
|
357 |
if annealing
|
358 |
+
beforeLoss = scoreFunc(prev)
|
359 |
+
afterLoss = scoreFunc(tree)
|
360 |
delta = afterLoss - beforeLoss
|
361 |
probChange = exp(-delta/(T*alpha))
|
362 |
|
|
|
384 |
score::Float32
|
385 |
birth::Int32
|
386 |
|
387 |
+
PopMember(t::Node) = new(t, scoreFunc(t), giveBirth())
|
388 |
+
PopMember(t::Node, score::Float32) = new(t, score, giveBirth())
|
389 |
|
390 |
end
|
391 |
|
|
|
415 |
end
|
416 |
|
417 |
# Return best 10 examples
|
418 |
+
function bestSubPop(pop::Population; topn::Int32=10)::Population
|
419 |
best_idx = sortperm([pop.members[member].score for member=1:pop.n])
|
420 |
+
return Population(pop.members[best_idx[1:topn]])
|
421 |
end
|
422 |
|
423 |
# Mutate the best sampled member of the population
|
|
|
427 |
allstar = bestOfSample(pop)
|
428 |
new = iterate(
|
429 |
allstar.tree, T,
|
430 |
+
alpha, annealing=annealing)
|
431 |
allstar.tree = new
|
432 |
+
allstar.score = scoreFunc(new)
|
433 |
+
allstar.birth = giveBirth()
|
434 |
return allstar
|
435 |
end
|
436 |
|
|
|
505 |
end
|
506 |
end
|
507 |
|
508 |
+
|
509 |
+
# Proxy function for optimization
|
510 |
+
function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
|
511 |
+
setConstants(tree, x)
|
512 |
+
return scoreFunc(tree)
|
513 |
+
end
|
514 |
+
|
515 |
+
# Use Nelder-Mead to optimize the constants in an equation
|
516 |
function optimizeConstants(member::PopMember)::PopMember
|
517 |
+
nconst = countConstants(member.tree)
|
518 |
+
if nconst == 0
|
519 |
+
return member
|
520 |
+
end
|
521 |
x0 = getConstants(member.tree)
|
522 |
+
f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
|
523 |
+
if size(x0)[1] == 1
|
524 |
+
result = Optim.optimize(f, x0, Optim.Newton(), Optim.Options(iterations=20))
|
525 |
+
else
|
526 |
+
result = Optim.optimize(f, x0, Optim.LBFGS(), Optim.Options(iterations=10))
|
527 |
+
end
|
528 |
+
if Optim.converged(result)
|
529 |
+
setConstants(member.tree, result.minimizer)
|
530 |
+
member.score = convert(Float32, result.minimum)
|
531 |
+
member.birth = giveBirth()
|
532 |
+
else
|
533 |
+
setConstants(member.tree, x0)
|
534 |
+
end
|
535 |
return member
|
536 |
end
|
537 |
|
paralleleureqa.jl
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
include("eureqa.jl")
|
|
|
2 |
|
3 |
const nthreads = Threads.nthreads()
|
4 |
|
@@ -6,10 +7,9 @@ const nthreads = Threads.nthreads()
|
|
6 |
mutable struct HallOfFame
|
7 |
members::Array{PopMember, 1}
|
8 |
exists::Array{Bool, 1} #Whether it has been set
|
9 |
-
optimized::Array{Bool, 1} #Whether the constants have been optimized
|
10 |
|
11 |
# Arranged by complexity - store one at each.
|
12 |
-
HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize]
|
13 |
end
|
14 |
|
15 |
|
@@ -19,11 +19,13 @@ function fullRun(niterations::Integer;
|
|
19 |
ncyclesperiteration::Integer=3000,
|
20 |
fractionReplaced::Float32=0.1f0,
|
21 |
verbosity::Integer=0,
|
|
|
22 |
)
|
23 |
debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
|
24 |
debug(verbosity, "Running with $nthreads threads")
|
25 |
# Generate random initial populations
|
26 |
allPops = [Population(npop, 3) for j=1:nthreads]
|
|
|
27 |
# Repeat this many evolutions; we collect and migrate the best
|
28 |
# each time.
|
29 |
hallOfFame = HallOfFame()
|
@@ -32,16 +34,22 @@ function fullRun(niterations::Integer;
|
|
32 |
# Spawn threads to run indepdent evolutions, then gather them
|
33 |
@inbounds Threads.@threads for i=1:nthreads
|
34 |
allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
|
|
|
|
|
|
|
|
|
35 |
end
|
36 |
|
37 |
# Get best 10 models from each evolution. Copy because we re-assign later.
|
38 |
-
bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
|
|
|
39 |
|
40 |
#Update hall of fame
|
41 |
for member in bestPops.members
|
42 |
size = countNodes(member.tree)
|
43 |
if member.score < hallOfFame.members[size].score
|
44 |
hallOfFame.members[size] = deepcopy(member)
|
|
|
45 |
hallOfFame.exists[size] = true
|
46 |
end
|
47 |
end
|
@@ -57,7 +65,6 @@ function fullRun(niterations::Integer;
|
|
57 |
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
58 |
if betterThanAllSmaller
|
59 |
debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
|
60 |
-
member = optimizeConstants(member)
|
61 |
push!(dominating, member)
|
62 |
end
|
63 |
end
|
@@ -79,7 +86,7 @@ function fullRun(niterations::Integer;
|
|
79 |
for j=1:nthreads
|
80 |
for k in rand(1:npop, Integer(npop*fractionReplacedHof))
|
81 |
# Copy in case one gets used twice
|
82 |
-
allPops[j].members[k] = deepcopy(dominating[rand(
|
83 |
end
|
84 |
end
|
85 |
end
|
|
|
1 |
include("eureqa.jl")
|
2 |
+
import Optim
|
3 |
|
4 |
const nthreads = Threads.nthreads()
|
5 |
|
|
|
7 |
mutable struct HallOfFame
|
8 |
members::Array{PopMember, 1}
|
9 |
exists::Array{Bool, 1} #Whether it has been set
|
|
|
10 |
|
11 |
# Arranged by complexity - store one at each.
|
12 |
+
HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
|
13 |
end
|
14 |
|
15 |
|
|
|
19 |
ncyclesperiteration::Integer=3000,
|
20 |
fractionReplaced::Float32=0.1f0,
|
21 |
verbosity::Integer=0,
|
22 |
+
topn::Int32=10
|
23 |
)
|
24 |
debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
|
25 |
debug(verbosity, "Running with $nthreads threads")
|
26 |
# Generate random initial populations
|
27 |
allPops = [Population(npop, 3) for j=1:nthreads]
|
28 |
+
bestSubPops = [Population(1) for j=1:nthreads]
|
29 |
# Repeat this many evolutions; we collect and migrate the best
|
30 |
# each time.
|
31 |
hallOfFame = HallOfFame()
|
|
|
34 |
# Spawn threads to run indepdent evolutions, then gather them
|
35 |
@inbounds Threads.@threads for i=1:nthreads
|
36 |
allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
|
37 |
+
bestSubPops[i] = bestSubPop(allPops[i], topn=topn)
|
38 |
+
for j=1:bestSubPops[i].n
|
39 |
+
bestSubPops[i].members[j] = optimizeConstants(bestSubPops[i].members[j])
|
40 |
+
end
|
41 |
end
|
42 |
|
43 |
# Get best 10 models from each evolution. Copy because we re-assign later.
|
44 |
+
# bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
|
45 |
+
bestPops = deepcopy(Population([member for pop in bestSubPops for member in pop.members]))
|
46 |
|
47 |
#Update hall of fame
|
48 |
for member in bestPops.members
|
49 |
size = countNodes(member.tree)
|
50 |
if member.score < hallOfFame.members[size].score
|
51 |
hallOfFame.members[size] = deepcopy(member)
|
52 |
+
#hallOfFame.members[size] = optimizeConstants(hallOfFame.members[size])
|
53 |
hallOfFame.exists[size] = true
|
54 |
end
|
55 |
end
|
|
|
65 |
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
66 |
if betterThanAllSmaller
|
67 |
debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
|
|
|
68 |
push!(dominating, member)
|
69 |
end
|
70 |
end
|
|
|
86 |
for j=1:nthreads
|
87 |
for k in rand(1:npop, Integer(npop*fractionReplacedHof))
|
88 |
# Copy in case one gets used twice
|
89 |
+
allPops[j].members[k] = deepcopy(dominating[rand(2:size(dominating)[1])])
|
90 |
end
|
91 |
end
|
92 |
end
|