Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Sep 16, 2020

Commit

cedbbde

1 Parent(s): b18ab5a

Optimize hall of fame constants with LBFGS

Browse files

Files changed (3) hide show

README.md +4 -4
eureqa.jl +41 -15
paralleleureqa.jl +12 -5

README.md CHANGED Viewed

@@ -77,10 +77,6 @@ weights = [8, 1, 1, 1, 0.1, 2]
 # TODO
-- [ ] Explicit constant optimization on hall-of-fame
-    - Create method to find and return all constants, from left to right
-    - Create method to find and set all constants, in same order
-    - Pull up some optimization algorithm and add it. No need for gradients; that's a headache. Keep the package small!
 - [ ] Hyperparameter tune
 - [ ] Create a Python interface
 - [ ] Create a benchmark for accuracy
@@ -94,6 +90,10 @@ weights = [8, 1, 1, 1, 0.1, 2]
             - Seems like its necessary right now. But still by far the slowest option.
         - [ ] Calculating the loss function - there is duplicate calculations happening.
         - [ ] Declaration of the weights array every iteration
 - [x] Create a benchmark for speed
 - [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
 - [x] Record hall of fame

 # TODO
 - [ ] Hyperparameter tune
 - [ ] Create a Python interface
 - [ ] Create a benchmark for accuracy
             - Seems like its necessary right now. But still by far the slowest option.
         - [ ] Calculating the loss function - there is duplicate calculations happening.
         - [ ] Declaration of the weights array every iteration
+- [x] Explicit constant optimization on hall-of-fame
+    - Create method to find and return all constants, from left to right
+    - Create method to find and set all constants, in same order
+    - Pull up some optimization algorithm and add it. Keep the package small!
 - [x] Create a benchmark for speed
 - [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes?
 - [x] Record hall of fame

eureqa.jl CHANGED Viewed

@@ -1,5 +1,6 @@
 include("hyperparams.jl")
 include("dataset.jl")
 const maxdegree = 2
 const actualMaxsize = maxsize + maxdegree
@@ -14,6 +15,10 @@ function debug(verbosity, string...)
     verbosity > 0 ? println(string...) : nothing
 end
 # Define a serialization format for the symbolic equations:
 mutable struct Node
     #Holds operators, variables, constants in a tree
@@ -222,9 +227,7 @@ function MSE(x::Array{Float32}, y::Array{Float32})::Float32
 end
 # Score an equation
-function scoreFunc(
-        tree::Node;
-        parsimony::Float32=0.1f0)::Float32
     try
         return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
     catch error
@@ -323,8 +326,7 @@ end
 #  exp(-delta/T) defines probability of accepting a change
 function iterate(
         tree::Node, T::Float32,
-        alpha::Float32=1.0f0,
-        mult::Float32=0.1f0;
         annealing::Bool=true
     )::Node
     prev = tree
@@ -353,8 +355,8 @@ function iterate(
     end
     if annealing
-        beforeLoss = scoreFunc(prev, parsimony=mult)
-        afterLoss = scoreFunc(tree, parsimony=mult)
         delta = afterLoss - beforeLoss
         probChange = exp(-delta/(T*alpha))
@@ -382,8 +384,8 @@ mutable struct PopMember
     score::Float32
     birth::Int32
-    PopMember(t::Node) = new(t, scoreFunc(t, parsimony=parsimony), round(Int32, 1e3*(time()-1.6e9)))
-    PopMember(t::Node, score::Float32) = new(t, score, round(Int32, 1e3*(time()-1.6e9)))
 end
@@ -413,9 +415,9 @@ function bestOfSample(pop::Population)::PopMember
 end
 # Return best 10 examples
-function bestSubPop(pop::Population)::Population
     best_idx = sortperm([pop.members[member].score for member=1:pop.n])
-    return Population(pop.members[best_idx[1:10]])
 end
 # Mutate the best sampled member of the population
@@ -425,10 +427,10 @@ function iterateSample(
     allstar = bestOfSample(pop)
     new = iterate(
         allstar.tree, T,
-        alpha, parsimony, annealing=annealing)
     allstar.tree = new
-    allstar.score = scoreFunc(new, parsimony=parsimony)
-    allstar.birth = round(Int32, 1e3*(time()-1.6e9))
     return allstar
 end
@@ -503,9 +505,33 @@ function setConstants(tree::Node, constants::Array{Float32, 1})
     end
 end
-# Does nothing currently
 function optimizeConstants(member::PopMember)::PopMember
     x0 = getConstants(member.tree)
     return member
 end

 include("hyperparams.jl")
 include("dataset.jl")
+import Optim
 const maxdegree = 2
 const actualMaxsize = maxsize + maxdegree
     verbosity > 0 ? println(string...) : nothing
 end
+function giveBirth()::Int32
+    return round(Int32, 1e3*(time()-1.6e9))
+end
 # Define a serialization format for the symbolic equations:
 mutable struct Node
     #Holds operators, variables, constants in a tree
 end
 # Score an equation
+function scoreFunc(tree::Node)::Float32
     try
         return MSE(evalTreeArray(tree), y) + countNodes(tree)*parsimony
     catch error
 #  exp(-delta/T) defines probability of accepting a change
 function iterate(
         tree::Node, T::Float32,
+        alpha::Float32=1.0f0;
         annealing::Bool=true
     )::Node
     prev = tree
     end
     if annealing
+        beforeLoss = scoreFunc(prev)
+        afterLoss = scoreFunc(tree)
         delta = afterLoss - beforeLoss
         probChange = exp(-delta/(T*alpha))
     score::Float32
     birth::Int32
+    PopMember(t::Node) = new(t, scoreFunc(t), giveBirth())
+    PopMember(t::Node, score::Float32) = new(t, score, giveBirth())
 end
 end
 # Return best 10 examples
+function bestSubPop(pop::Population; topn::Int32=10)::Population
     best_idx = sortperm([pop.members[member].score for member=1:pop.n])
+    return Population(pop.members[best_idx[1:topn]])
 end
 # Mutate the best sampled member of the population
     allstar = bestOfSample(pop)
     new = iterate(
         allstar.tree, T,
+        alpha, annealing=annealing)
     allstar.tree = new
+    allstar.score = scoreFunc(new)
+    allstar.birth = giveBirth()
     return allstar
 end
     end
 end
+# Proxy function for optimization
+function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
+    setConstants(tree, x)
+    return scoreFunc(tree)
+end
+# Use Nelder-Mead to optimize the constants in an equation
 function optimizeConstants(member::PopMember)::PopMember
+    nconst = countConstants(member.tree)
+    if nconst == 0
+        return member
+    end
     x0 = getConstants(member.tree)
+    f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
+    if size(x0)[1] == 1
+        result = Optim.optimize(f, x0, Optim.Newton(), Optim.Options(iterations=20))
+    else
+        result = Optim.optimize(f, x0, Optim.LBFGS(), Optim.Options(iterations=10))
+    end
+    if Optim.converged(result)
+        setConstants(member.tree, result.minimizer)
+        member.score = convert(Float32, result.minimum)
+        member.birth = giveBirth()
+    else
+        setConstants(member.tree, x0)
+    end
     return member
 end

paralleleureqa.jl CHANGED Viewed

@@ -1,4 +1,5 @@
 include("eureqa.jl")
 const nthreads = Threads.nthreads()
@@ -6,10 +7,9 @@ const nthreads = Threads.nthreads()
 mutable struct HallOfFame
     members::Array{PopMember, 1}
     exists::Array{Bool, 1} #Whether it has been set
-    optimized::Array{Bool, 1} #Whether the constants have been optimized
     # Arranged by complexity - store one at each.
-    HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize], [false for i=1:actualMaxsize])
 end
@@ -19,11 +19,13 @@ function fullRun(niterations::Integer;
                 ncyclesperiteration::Integer=3000,
                 fractionReplaced::Float32=0.1f0,
                 verbosity::Integer=0,
                )
     debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
     debug(verbosity, "Running with $nthreads threads")
     # Generate random initial populations
     allPops = [Population(npop, 3) for j=1:nthreads]
     # Repeat this many evolutions; we collect and migrate the best
     # each time.
     hallOfFame = HallOfFame()
@@ -32,16 +34,22 @@ function fullRun(niterations::Integer;
         # Spawn threads to run indepdent evolutions, then gather them
         @inbounds Threads.@threads for i=1:nthreads
             allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
         end
         # Get best 10 models from each evolution. Copy because we re-assign later.
-        bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
         #Update hall of fame
         for member in bestPops.members
             size = countNodes(member.tree)
             if member.score < hallOfFame.members[size].score
                 hallOfFame.members[size] = deepcopy(member)
                 hallOfFame.exists[size] = true
             end
         end
@@ -57,7 +65,6 @@ function fullRun(niterations::Integer;
                 betterThanAllSmaller = (numberSmallerAndBetter == 0)
                 if betterThanAllSmaller
                     debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
-                    member = optimizeConstants(member)
                     push!(dominating, member)
                 end
             end
@@ -79,7 +86,7 @@ function fullRun(niterations::Integer;
             for j=1:nthreads
                 for k in rand(1:npop, Integer(npop*fractionReplacedHof))
                     # Copy in case one gets used twice
-                    allPops[j].members[k] = deepcopy(dominating[rand(1:size(dominating)[1])])
                 end
             end
         end

 include("eureqa.jl")
+import Optim
 const nthreads = Threads.nthreads()
 mutable struct HallOfFame
     members::Array{PopMember, 1}
     exists::Array{Bool, 1} #Whether it has been set
     # Arranged by complexity - store one at each.
+    HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
 end
                 ncyclesperiteration::Integer=3000,
                 fractionReplaced::Float32=0.1f0,
                 verbosity::Integer=0,
+                topn::Int32=10
                )
     debug(verbosity, "Lets try to learn (x2^2 + cos(x3)) using regularized evolution from scratch")
     debug(verbosity, "Running with $nthreads threads")
     # Generate random initial populations
     allPops = [Population(npop, 3) for j=1:nthreads]
+    bestSubPops = [Population(1) for j=1:nthreads]
     # Repeat this many evolutions; we collect and migrate the best
     # each time.
     hallOfFame = HallOfFame()
         # Spawn threads to run indepdent evolutions, then gather them
         @inbounds Threads.@threads for i=1:nthreads
             allPops[i] = run(allPops[i], ncyclesperiteration, annealing, verbosity=verbosity)
+            bestSubPops[i] = bestSubPop(allPops[i], topn=topn)
+            for j=1:bestSubPops[i].n
+                bestSubPops[i].members[j] = optimizeConstants(bestSubPops[i].members[j])
+            end
         end
         # Get best 10 models from each evolution. Copy because we re-assign later.
+        # bestPops = deepcopy(Population([member for pop in allPops for member in bestSubPop(pop).members]))
+        bestPops = deepcopy(Population([member for pop in bestSubPops for member in pop.members]))
         #Update hall of fame
         for member in bestPops.members
             size = countNodes(member.tree)
             if member.score < hallOfFame.members[size].score
                 hallOfFame.members[size] = deepcopy(member)
+                #hallOfFame.members[size] = optimizeConstants(hallOfFame.members[size])
                 hallOfFame.exists[size] = true
             end
         end
                 betterThanAllSmaller = (numberSmallerAndBetter == 0)
                 if betterThanAllSmaller
                     debug(verbosity, "$size \t $(member.score-parsimony*size) \t $(stringTree(member.tree))")
                     push!(dominating, member)
                 end
             end
             for j=1:nthreads
                 for k in rand(1:npop, Integer(npop*fractionReplacedHof))
                     # Copy in case one gets used twice
+                    allPops[j].members[k] = deepcopy(dominating[rand(2:size(dominating)[1])])
                 end
             end
         end