MilesCranmer commited on
Commit
683071f
·
1 Parent(s): 319103f

Add optional threaded sub-samples

Browse files
Files changed (2) hide show
  1. julia/sr.jl +38 -7
  2. pysr/sr.py +7 -0
julia/sr.jl CHANGED
@@ -1,5 +1,6 @@
1
  import Optim
2
  import Printf: @printf
 
3
 
4
  const maxdegree = 2
5
  const actualMaxsize = maxsize + maxdegree
@@ -625,19 +626,49 @@ end
625
 
626
  # Mutate the best sampled member of the population
627
  function iterateSample(pop::Population, T::Float32)::PopMember
628
- allstar = bestOfSample(pop)
629
- return iterate(allstar, T)
630
  end
631
 
632
  # Pass through the population several times, replacing the oldest
633
  # with the fittest of a small subsample
634
  function regEvolCycle(pop::Population, T::Float32)::Population
635
- for i=1:round(Integer, pop.n/ns)
636
- baby = iterateSample(pop, T)
637
- #printTree(baby.tree)
638
- oldest = argmin([pop.members[member].birth for member=1:pop.n])
639
- pop.members[oldest] = baby
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  end
 
641
  return pop
642
  end
643
 
 
1
  import Optim
2
  import Printf: @printf
3
+ import Random: shuffle!
4
 
5
  const maxdegree = 2
6
  const actualMaxsize = maxsize + maxdegree
 
626
 
627
  # Mutate the best sampled member of the population
628
  function iterateSample(pop::Population, T::Float32)::PopMember
 
 
629
  end
630
 
631
  # Pass through the population several times, replacing the oldest
632
  # with the fittest of a small subsample
633
  function regEvolCycle(pop::Population, T::Float32)::Population
634
+ # Batch over each subsample. Can give 15% improvement in speed; probably moreso for large pops.
635
+ # but is ultimately a different algorithm than regularized evolution, and might not be
636
+ # as good.
637
+ if fast_cycle:
638
+ shuffle!(pop.members)
639
+ n_evol_cycles = round(Integer, pop.n/ns)
640
+ babies = Array{PopMember}(undef, n_evol_cycles)
641
+
642
+ # Iterate each ns-member sub-sample
643
+ @inbounds Threads.@threads for i=1:n_evol_cycles
644
+ best_score = Inf32
645
+ best_idx = 1+(i-1)*ns
646
+ # Calculate best member of the subsample:
647
+ for sub_i=1+(i-1)*ns:i*ns
648
+ if pop.members[sub_i].score < best_score
649
+ best_score = pop.members[sub_i].score
650
+ best_idx = sub_i
651
+ end
652
+ end
653
+ allstar = pop.members[best_idx]
654
+ babies[i] = iterate(allstar, T)
655
+ end
656
+
657
+ # Replace the n_evol_cycles-oldest members of each population
658
+ @inbounds for i=1:n_evol_cycles
659
+ oldest = argmin([pop.members[member].birth for member=1:pop.n])
660
+ pop.members[oldest] = babies[i]
661
+ end
662
+ else
663
+ for i=1:round(Integer, pop.n/ns)
664
+ allstar = bestOfSample(pop)
665
+ baby = iterate(allstar, T)
666
+ #printTree(baby.tree)
667
+ oldest = argmin([pop.members[member].birth for member=1:pop.n])
668
+ pop.members[oldest] = baby
669
+ end
670
  end
671
+
672
  return pop
673
  end
674
 
pysr/sr.py CHANGED
@@ -73,6 +73,7 @@ def pysr(X=None, y=None, weights=None,
73
  test='simple1',
74
  verbosity=1e9,
75
  maxsize=20,
 
76
  maxdepth=None,
77
  threads=None, #deprecated
78
  julia_optimization=3,
@@ -129,6 +130,11 @@ def pysr(X=None, y=None, weights=None,
129
  :param equation_file: str, Where to save the files (.csv separated by |)
130
  :param test: str, What test to run, if X,y not passed.
131
  :param maxsize: int, Max size of an equation.
 
 
 
 
 
132
  :param julia_optimization: int, Optimization level (0, 1, 2, 3)
133
  :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
134
  (as strings).
@@ -204,6 +210,7 @@ const parsimony = {parsimony:f}f0
204
  const alpha = {alpha:f}f0
205
  const maxsize = {maxsize:d}
206
  const maxdepth = {maxdepth:d}
 
207
  const migration = {'true' if migration else 'false'}
208
  const hofMigration = {'true' if hofMigration else 'false'}
209
  const fractionReplacedHof = {fractionReplacedHof}f0
 
73
  test='simple1',
74
  verbosity=1e9,
75
  maxsize=20,
76
+ fast_cycle=False,
77
  maxdepth=None,
78
  threads=None, #deprecated
79
  julia_optimization=3,
 
130
  :param equation_file: str, Where to save the files (.csv separated by |)
131
  :param test: str, What test to run, if X,y not passed.
132
  :param maxsize: int, Max size of an equation.
133
+ :param maxdepth: int, Max depth of an equation. You can use both maxsize and maxdepth.
134
+ maxdepth is by default set to = maxsize, which means that it is redundant.
135
+ :param fast_cycle: bool, (experimental) - batch over population subsamples. This
136
+ is a slightly different algorithm than regularized evolution, but does cycles
137
+ 15% faster. May be algorithmically less efficient.
138
  :param julia_optimization: int, Optimization level (0, 1, 2, 3)
139
  :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
140
  (as strings).
 
210
  const alpha = {alpha:f}f0
211
  const maxsize = {maxsize:d}
212
  const maxdepth = {maxdepth:d}
213
+ const fast_cycle = {'true' if fast_cycle else 'false'}
214
  const migration = {'true' if migration else 'false'}
215
  const hofMigration = {'true' if hofMigration else 'false'}
216
  const fractionReplacedHof = {fractionReplacedHof}f0