MilesCranmer commited on
Commit
cb0e2e9
1 Parent(s): 6d4b486

Speed up by not recalculating score

Browse files
Files changed (2) hide show
  1. README.md +4 -4
  2. julia/sr.jl +26 -30
README.md CHANGED
@@ -274,6 +274,8 @@ pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
274
  - [x] Put on PyPI
275
  - [x] Treat baseline as a solution.
276
  - [x] Print score alongside MSE: \delta \log(MSE)/\delta \log(complexity)
 
 
277
  - [ ] Add true multi-node processing, with MPI, or just file sharing. Multiple populations per core.
278
  - Ongoing in cluster branch
279
  - [ ] Consider allowing multi-threading turned off, for faster testing (cache issue on travis). Or could simply fix the caching issue there.
@@ -296,10 +298,8 @@ pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
296
  - [ ] Add GPU capability?
297
  - Not sure if possible, as binary trees are the real bottleneck.
298
  - [ ] Performance:
299
- - [ ] Use an enum for functions instead of storing them?
300
- - Current most expensive operations:
301
- - [ ] Calculating the loss function - there is duplicate calculations happening.
302
- - [x] Declaration of the weights array every iteration
303
  - [ ] Idea: use gradient of equation with respect to each operator (perhaps simply add to each operator) to tell which part is the most "sensitive" to changes. Then, perhaps insert/delete/mutate on that part of the tree?
304
  - [ ] For hierarchical idea: after running some number of iterations, do a search for "most common pattern". Then, turn that subtree into its own operator.
305
  - [ ] Additional degree operators?
 
274
  - [x] Put on PyPI
275
  - [x] Treat baseline as a solution.
276
  - [x] Print score alongside MSE: \delta \log(MSE)/\delta \log(complexity)
277
+ - [x] Calculating the loss function - there is duplicate calculations happening.
278
+ - [x] Declaration of the weights array every iteration
279
  - [ ] Add true multi-node processing, with MPI, or just file sharing. Multiple populations per core.
280
  - Ongoing in cluster branch
281
  - [ ] Consider allowing multi-threading turned off, for faster testing (cache issue on travis). Or could simply fix the caching issue there.
 
298
  - [ ] Add GPU capability?
299
  - Not sure if possible, as binary trees are the real bottleneck.
300
  - [ ] Performance:
301
+ - Use an enum for functions instead of storing them?
302
+ - Threaded recursion?
 
 
303
  - [ ] Idea: use gradient of equation with respect to each operator (perhaps simply add to each operator) to tell which part is the most "sensitive" to changes. Then, perhaps insert/delete/mutate on that part of the tree?
304
  - [ ] For hierarchical idea: after running some number of iterations, do a search for "most common pattern". Then, turn that subtree into its own operator.
305
  - [ ] Additional degree operators?
julia/sr.jl CHANGED
@@ -445,7 +445,7 @@ function deleteRandomOp(tree::Node)::Node
445
  return tree
446
  end
447
 
448
- # Simplify tree
449
  function combineOperators(tree::Node)::Node
450
  # (const (+*) const) already accounted for
451
  # ((const + var) + const) => (const + var)
@@ -478,7 +478,7 @@ function combineOperators(tree::Node)::Node
478
  return tree
479
  end
480
 
481
- # Simplify tree
482
  function simplifyTree(tree::Node)::Node
483
  if tree.degree == 1
484
  tree.l = simplifyTree(tree.l)
@@ -499,11 +499,23 @@ function simplifyTree(tree::Node)::Node
499
  return tree
500
  end
501
 
 
 
 
 
 
 
 
 
 
 
 
502
  # Go through one simulated annealing mutation cycle
503
  # exp(-delta/T) defines probability of accepting a change
504
- function iterate(tree::Node, T::Float32)::Node
505
- prev = tree
506
- tree = copyNode(tree)
 
507
 
508
  mutationChoice = rand()
509
  weightAdjustmentMutateConstant = min(8, countConstants(tree))/8.0
@@ -526,25 +538,25 @@ function iterate(tree::Node, T::Float32)::Node
526
  elseif mutationChoice < cweights[6]
527
  tree = simplifyTree(tree) # Sometimes we simplify tree
528
  tree = combineOperators(tree) # See if repeated constants at outer levels
529
- return tree
530
  elseif mutationChoice < cweights[7]
531
  tree = genRandomTree(5) # Sometimes we simplify tree
532
  else
533
- return tree
534
  end
535
 
 
 
536
  if annealing
537
- beforeLoss = scoreFunc(prev)
538
- afterLoss = scoreFunc(tree)
539
  delta = afterLoss - beforeLoss
540
  probChange = exp(-delta/(T*alpha))
541
 
542
- if isnan(afterLoss) || probChange < rand()
543
- return copyNode(prev)
 
544
  end
545
  end
546
-
547
- return tree
548
  end
549
 
550
  # Create a random equation by appending random operators
@@ -557,17 +569,6 @@ function genRandomTree(length::Integer)::Node
557
  end
558
 
559
 
560
- # Define a member of population by equation, score, and age
561
- mutable struct PopMember
562
- tree::Node
563
- score::Float32
564
- birth::Int32
565
-
566
- PopMember(t::Node) = new(t, scoreFunc(t), getTime())
567
- PopMember(t::Node, score::Float32) = new(t, score, getTime())
568
-
569
- end
570
-
571
  # A list of members of the population, with easy constructors,
572
  # which allow for random generation of new populations
573
  mutable struct Population
@@ -602,11 +603,7 @@ end
602
  # Mutate the best sampled member of the population
603
  function iterateSample(pop::Population, T::Float32)::PopMember
604
  allstar = bestOfSample(pop)
605
- new = iterate(allstar.tree, T)
606
- allstar.tree = new
607
- allstar.score = scoreFunc(new)
608
- allstar.birth = getTime()
609
- return allstar
610
  end
611
 
612
  # Pass through the population several times, replacing the oldest
@@ -905,4 +902,3 @@ function fullRun(niterations::Integer;
905
  end
906
  end
907
  end
908
-
 
445
  return tree
446
  end
447
 
448
+ # Simplify tree
449
  function combineOperators(tree::Node)::Node
450
  # (const (+*) const) already accounted for
451
  # ((const + var) + const) => (const + var)
 
478
  return tree
479
  end
480
 
481
+ # Simplify tree
482
  function simplifyTree(tree::Node)::Node
483
  if tree.degree == 1
484
  tree.l = simplifyTree(tree.l)
 
499
  return tree
500
  end
501
 
502
+ # Define a member of population by equation, score, and age
503
+ mutable struct PopMember
504
+ tree::Node
505
+ score::Float32
506
+ birth::Int32
507
+
508
+ PopMember(t::Node) = new(t, scoreFunc(t), getTime())
509
+ PopMember(t::Node, score::Float32) = new(t, score, getTime())
510
+
511
+ end
512
+
513
  # Go through one simulated annealing mutation cycle
514
  # exp(-delta/T) defines probability of accepting a change
515
+ function iterate(member::PopMember, T::Float32)::PopMember
516
+ prev = member.tree
517
+ tree = copyNode(prev)
518
+ beforeLoss = member.score
519
 
520
  mutationChoice = rand()
521
  weightAdjustmentMutateConstant = min(8, countConstants(tree))/8.0
 
538
  elseif mutationChoice < cweights[6]
539
  tree = simplifyTree(tree) # Sometimes we simplify tree
540
  tree = combineOperators(tree) # See if repeated constants at outer levels
541
+ return PopMember(tree, beforeLoss)
542
  elseif mutationChoice < cweights[7]
543
  tree = genRandomTree(5) # Sometimes we simplify tree
544
  else
545
+ return PopMember(tree, beforeLoss)
546
  end
547
 
548
+ afterLoss = scoreFunc(tree)
549
+
550
  if annealing
 
 
551
  delta = afterLoss - beforeLoss
552
  probChange = exp(-delta/(T*alpha))
553
 
554
+ return_unaltered = (isnan(afterLoss) || probChange < rand())
555
+ if return_unaltered
556
+ return PopMember(copyNode(prev), beforeLoss)
557
  end
558
  end
559
+ return PopMember(tree, afterLoss)
 
560
  end
561
 
562
  # Create a random equation by appending random operators
 
569
  end
570
 
571
 
 
 
 
 
 
 
 
 
 
 
 
572
  # A list of members of the population, with easy constructors,
573
  # which allow for random generation of new populations
574
  mutable struct Population
 
603
  # Mutate the best sampled member of the population
604
  function iterateSample(pop::Population, T::Float32)::PopMember
605
  allstar = bestOfSample(pop)
606
+ return iterate(allstar, T)
 
 
 
 
607
  end
608
 
609
  # Pass through the population several times, replacing the oldest
 
902
  end
903
  end
904
  end