MilesCranmer commited on
Commit
cedbbde
1 Parent(s): b18ab5a

Optimize hall of fame constants with LBFGS

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. eureqa.jl +41 -15
  3. paralleleureqa.jl +12 -5
README.md CHANGED
@@ -77,10 +77,6 @@ weights = [8, 1, 1, 1, 0.1, 2]
77
 
78
  # TODO
79
 
80
- - [ ] Explicit constant optimization on hall-of-fame
81
- - Create method to find and return all constants, from left to right
82
- - Create method to find and set all constants, in same order
83
- - Pull up some optimization algorithm and add it. No need for gradients; that's a headache. Keep the package small!
84
  - [ ] Hyperparameter tune
85
  - [ ] Create a Python interface
86
  - [ ] Create a benchmark for accuracy
@@ -94,6 +90,10 @@ weights = [8, 1, 1, 1, 0.1, 2]
94
  - Seems like its necessary right now. But still by far the slowest option.
95
  - [ ] Calculating the loss function - there is duplicate calculations happening.
96
  - [ ] Declaration of the weights array every iteration
 
 
 
 
97
  - [x] Create a benchmark for speed
98
  - [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
99
  - [x] Record hall of fame
 
77
 
78
  # TODO
79
 
 
 
 
 
80
  - [ ] Hyperparameter tune
81
  - [ ] Create a Python interface
82
  - [ ] Create a benchmark for accuracy
 
90
  - Seems like its necessary right now. But still by far the slowest option.
91
  - [ ] Calculating the loss function - there is duplicate calculations happening.
92
  - [ ] Declaration of the weights array every iteration
93
+ - [x] Explicit constant optimization on hall-of-fame
94
+ - Create method to find and return all constants, from left to right
95
+ - Create method to find and set all constants, in same order
96
+ - Pull up some optimization algorithm and add it. Keep the package small!
97
  - [x] Create a benchmark for speed
98
  - [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
99
  - [x] Record hall of fame
eureqa.jl CHANGED
@@ -1,5 +1,6 @@
1
  include("hyperparams.jl")
2
  include("dataset.jl")
 
3
 
4
  const maxdegree = 2
5
  const actualMaxsize = maxsize + maxdegree
@@ -14,6 +15,10 @@ function debug(verbosity, string...)
14
  verbosity > 0 ? println(string...) : nothing
15
  end
16
 
 
 
 
 
17
  # Define a serialization format for the symbolic equations:
18
  mutable struct Node
19
  #Holds operators, variables, constants in a tree
@@ -222,9 +227,7 @@ function MSE(x::Array{Float32}, y::Array{Float32})::Float32
222
  end
223
 
224
  # Score an equation
225
- function scoreFunc(
226
- tree::Node;
227
- parsimony::Float32=0.1f0)::Float32
228
  try
229
  return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
230
  catch error
@@ -323,8 +326,7 @@ end
323
  # exp(-delta/T) defines probability of accepting a change
324
  function iterate(
325
  tree::Node, T::Float32,
326
- alpha::Float32=1.0f0,
327
- mult::Float32=0.1f0;
328
  annealing::Bool=true
329
  )::Node
330
  prev = tree
@@ -353,8 +355,8 @@ function iterate(
353
  end
354
 
355
  if annealing
356
- beforeLoss = scoreFunc(prev, parsimony=mult)
357
- afterLoss = scoreFunc(tree, parsimony=mult)
358
  delta = afterLoss - beforeLoss
359
  probChange = exp(-delta/(T*alpha))
360
 
@@ -382,8 +384,8 @@ mutable struct PopMember
382
  score::Float32
383
  birth::Int32
384
 
385
- PopMember(t::Node) = new(t, scoreFunc(t, parsimony=parsimony), round(Int32, 1e3*(time()-1.6e9)))
386
- PopMember(t::Node, score::Float32) = new(t, score, round(Int32, 1e3*(time()-1.6e9)))
387
 
388
  end
389
 
@@ -413,9 +415,9 @@ function bestOfSample(pop::Population)::PopMember
413
  end
414
 
415
  # Return best 10 examples
416
- function bestSubPop(pop::Population)::Population
417
  best_idx = sortperm([pop.members[member].score for member=1:pop.n])
418
- return Population(pop.members[best_idx[1:10]])
419
  end
420
 
421
  # Mutate the best sampled member of the population
@@ -425,10 +427,10 @@ function iterateSample(
425
  allstar = bestOfSample(pop)
426
  new = iterate(
427
  allstar.tree, T,
428
- alpha, parsimony, annealing=annealing)
429
  allstar.tree = new
430
- allstar.score = scoreFunc(new, parsimony=parsimony)
431
- allstar.birth = round(Int32, 1e3*(time()-1.6e9))
432
  return allstar
433
  end
434
 
@@ -503,9 +505,33 @@ function setConstants(tree::Node, constants::Array{Float32, 1})
503
  end
504
  end
505
 
506
- # Does nothing currently
 
 
 
 
 
 
 
507
  function optimizeConstants(member::PopMember)::PopMember
 
 
 
 
508
  x0 = getConstants(member.tree)
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  return member
510
  end
511
 
 
1
  include("hyperparams.jl")
2
  include("dataset.jl")
3
+ import Optim
4
 
5
  const maxdegree = 2
6
  const actualMaxsize = maxsize + maxdegree
 
15
  verbosity > 0 ? println(string...) : nothing
16
  end
17
 
18
+ function giveBirth()::Int32
19
+ return round(Int32, 1e3*(time()-1.6e9))
20
+ end
21
+
22
  # Define a serialization format for the symbolic equations:
23
  mutable struct Node
24
  #Holds operators, variables, constants in a tree
 
227
  end
228
 
229
  # Score an equation
230
+ function scoreFunc(tree::Node)::Float32
 
 
231
  try
232
  return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
233
  catch error
 
326
  # exp(-delta/T) defines probability of accepting a change
327
  function iterate(
328
  tree::Node, T::Float32,
329
+ alpha::Float32=1.0f0;
 
330
  annealing::Bool=true
331
  )::Node
332
  prev = tree
 
355
  end
356
 
357
  if annealing
358
+ beforeLoss = scoreFunc(prev)
359
+ afterLoss = scoreFunc(tree)
360
  delta = afterLoss - beforeLoss
361
  probChange = exp(-delta/(T*alpha))
362
 
 
384
  score::Float32
385
  birth::Int32
386
 
387
+ PopMember(t::Node) = new(t, scoreFunc(t), giveBirth())
388
+ PopMember(t::Node, score::Float32) = new(t, score, giveBirth())
389
 
390
  end
391
 
 
415
  end
416
 
417
  # Return best 10 examples
418
+ function bestSubPop(pop::Population; topn::Int32=10)::Population
419
  best_idx = sortperm([pop.members[member].score for member=1:pop.n])
420
+ return Population(pop.members[best_idx[1:topn]])
421
  end
422
 
423
  # Mutate the best sampled member of the population
 
427
  allstar = bestOfSample(pop)
428
  new = iterate(
429
  allstar.tree, T,
430
+ alpha, annealing=annealing)
431
  allstar.tree = new
432
+ allstar.score = scoreFunc(new)
433
+ allstar.birth = giveBirth()
434
  return allstar
435
  end
436
 
 
505
  end
506
  end
507
 
508
+
509
+ # Proxy function for optimization
510
+ function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
511
+ setConstants(tree, x)
512
+ return scoreFunc(tree)
513
+ end
514
+
515
+ # Use Nelder-Mead to optimize the constants in an equation
516
  function optimizeConstants(member::PopMember)::PopMember
517
+ nconst = countConstants(member.tree)
518
+ if nconst == 0
519
+ return member
520
+ end
521
  x0 = getConstants(member.tree)
522
+ f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
523
+ if size(x0)[1] == 1
524
+ result = Optim.optimize(f, x0, Optim.Newton(), Optim.Options(iterations=20))
525
+ else
526
+ result = Optim.optimize(f, x0, Optim.LBFGS(), Optim.Options(iterations=10))
527
+ end
528
+ if Optim.converged(result)
529
+ setConstants(member.tree, result.minimizer)
530
+ member.score = convert(Float32, result.minimum)
531
+ member.birth = giveBirth()
532
+ else
533
+ setConstants(member.tree, x0)
534
+ end
535
  return member
536
  end
537
 
paralleleureqa.jl CHANGED
@@ -1,4 +1,5 @@
1
  include("eureqa.jl")
 
2
 
3
  const nthreads = Threads.nthreads()
4
 
@@ -6,10 +7,9 @@ const nthreads = Threads.nthreads()
6
  mutable struct HallOfFame
7
  members::Array{PopMember, 1}
8
  exists::Array{Bool, 1} #Whether it has been set
9
- optimized::Array{Bool, 1} #Whether the constants have been optimized
10
 
11
  # Arranged by complexity - store one at each.
12
- HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize], [false for i=1:actualMaxsize])
13
  end
14
 
15
 
@@ -19,11 +19,13 @@ function fullRun(niterations::Integer;
19
  ncyclesperiteration::Integer=3000,
20
  fractionReplaced::Float32=0.1f0,
21
  verbosity::Integer=0,
 
22
  )
23
  debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
24
  debug(verbosity, "Running with $nthreads threads")
25
  # Generate random initial populations
26
  allPops = [Population(npop, 3) for j=1:nthreads]
 
27
  # Repeat this many evolutions; we collect and migrate the best
28
  # each time.
29
  hallOfFame = HallOfFame()
@@ -32,16 +34,22 @@ function fullRun(niterations::Integer;
32
  # Spawn threads to run indepdent evolutions, then gather them
33
  @inbounds Threads.@threads for i=1:nthreads
34
  allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
 
 
 
 
35
  end
36
 
37
  # Get best 10 models from each evolution. Copy because we re-assign later.
38
- bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
 
39
 
40
  #Update hall of fame
41
  for member in bestPops.members
42
  size = countNodes(member.tree)
43
  if member.score < hallOfFame.members[size].score
44
  hallOfFame.members[size] = deepcopy(member)
 
45
  hallOfFame.exists[size] = true
46
  end
47
  end
@@ -57,7 +65,6 @@ function fullRun(niterations::Integer;
57
  betterThanAllSmaller = (numberSmallerAndBetter == 0)
58
  if betterThanAllSmaller
59
  debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
60
- member = optimizeConstants(member)
61
  push!(dominating, member)
62
  end
63
  end
@@ -79,7 +86,7 @@ function fullRun(niterations::Integer;
79
  for j=1:nthreads
80
  for k in rand(1:npop, Integer(npop*fractionReplacedHof))
81
  # Copy in case one gets used twice
82
- allPops[j].members[k] = deepcopy(dominating[rand(1:size(dominating)[1])])
83
  end
84
  end
85
  end
 
1
  include("eureqa.jl")
2
+ import Optim
3
 
4
  const nthreads = Threads.nthreads()
5
 
 
7
  mutable struct HallOfFame
8
  members::Array{PopMember, 1}
9
  exists::Array{Bool, 1} #Whether it has been set
 
10
 
11
  # Arranged by complexity - store one at each.
12
+ HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
13
  end
14
 
15
 
 
19
  ncyclesperiteration::Integer=3000,
20
  fractionReplaced::Float32=0.1f0,
21
  verbosity::Integer=0,
22
+ topn::Int32=10
23
  )
24
  debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
25
  debug(verbosity, "Running with $nthreads threads")
26
  # Generate random initial populations
27
  allPops = [Population(npop, 3) for j=1:nthreads]
28
+ bestSubPops = [Population(1) for j=1:nthreads]
29
  # Repeat this many evolutions; we collect and migrate the best
30
  # each time.
31
  hallOfFame = HallOfFame()
 
34
  # Spawn threads to run indepdent evolutions, then gather them
35
  @inbounds Threads.@threads for i=1:nthreads
36
  allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
37
+ bestSubPops[i] = bestSubPop(allPops[i], topn=topn)
38
+ for j=1:bestSubPops[i].n
39
+ bestSubPops[i].members[j] = optimizeConstants(bestSubPops[i].members[j])
40
+ end
41
  end
42
 
43
  # Get best 10 models from each evolution. Copy because we re-assign later.
44
+ # bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
45
+ bestPops = deepcopy(Population([member for pop in bestSubPops for member in pop.members]))
46
 
47
  #Update hall of fame
48
  for member in bestPops.members
49
  size = countNodes(member.tree)
50
  if member.score < hallOfFame.members[size].score
51
  hallOfFame.members[size] = deepcopy(member)
52
+ #hallOfFame.members[size] = optimizeConstants(hallOfFame.members[size])
53
  hallOfFame.exists[size] = true
54
  end
55
  end
 
65
  betterThanAllSmaller = (numberSmallerAndBetter == 0)
66
  if betterThanAllSmaller
67
  debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
 
68
  push!(dominating, member)
69
  end
70
  end
 
86
  for j=1:nthreads
87
  for k in rand(1:npop, Integer(npop*fractionReplacedHof))
88
  # Copy in case one gets used twice
89
+ allPops[j].members[k] = deepcopy(dominating[rand(2:size(dominating)[1])])
90
  end
91
  end
92
  end