MilesCranmer commited on
Commit
ecc6ae8
·
1 Parent(s): d85f644

Can call distributed processing from python

Browse files
Files changed (3) hide show
  1. julia/loop.jl +0 -117
  2. julia/sr.jl +106 -0
  3. pysr/sr.py +16 -8
julia/loop.jl DELETED
@@ -1,117 +0,0 @@
1
- using Distributed
2
- const nprocs = 4
3
- addprocs(4)
4
- @everywhere include(".dataset_28330894764081783777.jl")
5
- @everywhere include(".hyperparams_28330894764081783777.jl")
6
- @everywhere include("sr.jl")
7
-
8
-
9
- # 1. Start a population on every process
10
- allPops = Future[]
11
- bestSubPops = [Population(1) for j=1:nprocs]
12
- hallOfFame = HallOfFame()
13
-
14
- for i=1:nprocs
15
- npop=300
16
- future = @spawnat :any Population(npop, 3)
17
- push!(allPops, future)
18
- end
19
-
20
- npop=300
21
- ncyclesperiteration=3000
22
- fractionReplaced=0.1f0
23
- verbosity=convert(Int, 1e9)
24
- topn=10
25
- niterations=10
26
-
27
-
28
- # # 2. Start the cycle on every process:
29
- for i=1:nprocs
30
- allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
31
- end
32
- println("Started!")
33
- cycles_complete = nprocs * 10
34
- while cycles_complete > 0
35
- for i=1:nprocs
36
- if isready(allPops[i])
37
- cur_pop = fetch(allPops[i])
38
- bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
39
-
40
- #Try normal copy...
41
- bestPops = Population([member for pop in bestSubPops for member in pop.members])
42
-
43
- for member in cur_pop.members
44
- size = countNodes(member.tree)
45
- if member.score < hallOfFame.members[size].score
46
- hallOfFame.members[size] = deepcopy(member)
47
- hallOfFame.exists[size] = true
48
- end
49
- end
50
-
51
- # Dominating pareto curve - must be better than all simpler equations
52
- dominating = PopMember[]
53
- open(hofFile, "w") do io
54
- debug(verbosity, "\n")
55
- debug(verbosity, "Hall of Fame:")
56
- debug(verbosity, "-----------------------------------------")
57
- debug(verbosity, "Complexity \t MSE \t Equation")
58
- println(io,"Complexity|MSE|Equation")
59
- for size=1:actualMaxsize
60
- if hallOfFame.exists[size]
61
- member = hallOfFame.members[size]
62
- curMSE = MSE(evalTreeArray(member.tree), y)
63
- numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
64
- betterThanAllSmaller = (numberSmallerAndBetter == 0)
65
- if betterThanAllSmaller
66
- debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
67
- println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
68
- push!(dominating, member)
69
- end
70
- end
71
- end
72
- debug(verbosity, "")
73
- end
74
-
75
- # Try normal copy otherwise.
76
- if migration
77
- for k in rand(1:npop, round(Integer, npop*fractionReplaced))
78
- to_copy = rand(1:size(bestPops.members)[1])
79
- cur_pop.members[k] = PopMember(
80
- copyNode(bestPops.members[to_copy].tree),
81
- bestPops.members[to_copy].score)
82
- end
83
- end
84
-
85
- if hofMigration && size(dominating)[1] > 0
86
- for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
87
- # Copy in case one gets used twice
88
- to_copy = rand(1:size(dominating)[1])
89
- cur_pop.members[k] = PopMember(
90
- copyNode(dominating[to_copy].tree)
91
- )
92
- end
93
- end
94
-
95
- allPops[i] = @spawnat :any let
96
- tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
97
- for j=1:tmp_pop.n
98
- if rand() < 0.1
99
- tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
100
- tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
101
- if shouldOptimizeConstants
102
- tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
103
- end
104
- end
105
- end
106
- tmp_pop
107
- end
108
-
109
- global cycles_complete -= 1
110
- end
111
- end
112
- sleep(1e-3)
113
- end
114
-
115
- rmprocs(nprocs)
116
-
117
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
julia/sr.jl CHANGED
@@ -738,3 +738,109 @@ mutable struct HallOfFame
738
  end
739
 
740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
  end
739
 
740
 
741
+ function fullRun(niterations::Integer;
742
+ npop::Integer=300,
743
+ ncyclesperiteration::Integer=3000,
744
+ fractionReplaced::Float32=0.1f0,
745
+ verbosity::Integer=0,
746
+ topn::Integer=10
747
+ )
748
+ # 1. Start a population on every process
749
+ allPops = Future[]
750
+ bestSubPops = [Population(1) for j=1:nprocs]
751
+ hallOfFame = HallOfFame()
752
+
753
+ for i=1:nprocs
754
+ npop=300
755
+ future = @spawnat :any Population(npop, 3)
756
+ push!(allPops, future)
757
+ end
758
+
759
+ # # 2. Start the cycle on every process:
760
+ for i=1:nprocs
761
+ allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
762
+ end
763
+ println("Started!")
764
+ cycles_complete = nprocs * 10
765
+ while cycles_complete > 0
766
+ for i=1:nprocs
767
+ if isready(allPops[i])
768
+ cur_pop = fetch(allPops[i])
769
+ bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
770
+
771
+ #Try normal copy...
772
+ bestPops = Population([member for pop in bestSubPops for member in pop.members])
773
+
774
+ for member in cur_pop.members
775
+ size = countNodes(member.tree)
776
+ if member.score < hallOfFame.members[size].score
777
+ hallOfFame.members[size] = deepcopy(member)
778
+ hallOfFame.exists[size] = true
779
+ end
780
+ end
781
+
782
+ # Dominating pareto curve - must be better than all simpler equations
783
+ dominating = PopMember[]
784
+ open(hofFile, "w") do io
785
+ debug(verbosity, "\n")
786
+ debug(verbosity, "Hall of Fame:")
787
+ debug(verbosity, "-----------------------------------------")
788
+ debug(verbosity, "Complexity \t MSE \t Equation")
789
+ println(io,"Complexity|MSE|Equation")
790
+ for size=1:actualMaxsize
791
+ if hallOfFame.exists[size]
792
+ member = hallOfFame.members[size]
793
+ curMSE = MSE(evalTreeArray(member.tree), y)
794
+ numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
795
+ betterThanAllSmaller = (numberSmallerAndBetter == 0)
796
+ if betterThanAllSmaller
797
+ debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
798
+ println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
799
+ push!(dominating, member)
800
+ end
801
+ end
802
+ end
803
+ debug(verbosity, "")
804
+ end
805
+
806
+ # Try normal copy otherwise.
807
+ if migration
808
+ for k in rand(1:npop, round(Integer, npop*fractionReplaced))
809
+ to_copy = rand(1:size(bestPops.members)[1])
810
+ cur_pop.members[k] = PopMember(
811
+ copyNode(bestPops.members[to_copy].tree),
812
+ bestPops.members[to_copy].score)
813
+ end
814
+ end
815
+
816
+ if hofMigration && size(dominating)[1] > 0
817
+ for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
818
+ # Copy in case one gets used twice
819
+ to_copy = rand(1:size(dominating)[1])
820
+ cur_pop.members[k] = PopMember(
821
+ copyNode(dominating[to_copy].tree)
822
+ )
823
+ end
824
+ end
825
+
826
+ allPops[i] = @spawnat :any let
827
+ tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
828
+ for j=1:tmp_pop.n
829
+ if rand() < 0.1
830
+ tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
831
+ tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
832
+ if shouldOptimizeConstants
833
+ tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
834
+ end
835
+ end
836
+ end
837
+ tmp_pop
838
+ end
839
+
840
+ cycles_complete -= 1
841
+ end
842
+ end
843
+ sleep(1e-3)
844
+ end
845
+ end
846
+
pysr/sr.py CHANGED
@@ -5,7 +5,8 @@ import pathlib
5
  import numpy as np
6
  import pandas as pd
7
 
8
- def pysr(X=None, y=None, weights=None, threads=4,
 
9
  niterations=100,
10
  ncyclesperiteration=300,
11
  binary_operators=["plus", "mult"],
@@ -35,6 +36,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
35
  test='simple1',
36
  verbosity=1e9,
37
  maxsize=20,
 
38
  ):
39
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
40
  Note: most default parameters have been tuned over several example
@@ -43,9 +45,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
43
 
44
  :param X: np.ndarray, 2D array. Rows are examples, columns are features.
45
  :param y: np.ndarray, 1D array. Rows are examples.
46
- :param threads: int, Number of threads (=number of populations running).
47
- You can have more threads than cores - it actually makes it more
48
- efficient.
49
  :param niterations: int, Number of iterations of the algorithm to run. The best
50
  equations are printed, and migrate between populations, at the
51
  end of each.
@@ -91,6 +91,8 @@ def pysr(X=None, y=None, weights=None, threads=4,
91
  (as strings).
92
 
93
  """
 
 
94
 
95
  # Check for potential errors before they happen
96
  assert len(binary_operators) > 0
@@ -155,7 +157,7 @@ const hofMigration = {'true' if hofMigration else 'false'}
155
  const fractionReplacedHof = {fractionReplacedHof}f0
156
  const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
157
  const hofFile = "{equation_file}"
158
- const nthreads = {threads:d}
159
  const nrestarts = {nrestarts:d}
160
  const perturbationFactor = {perturbationFactor:f}f0
161
  const annealing = {"true" if annealing else "false"}
@@ -192,12 +194,18 @@ const weights = convert(Array{Float32, 1}, """f"{weight_str})"
192
  with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
193
  print(def_datasets, file=f)
194
 
 
 
 
 
 
 
 
195
 
196
  command = [
197
  'julia -O3',
198
- '--threads auto',
199
- '-e',
200
- f'\'include("/tmp/.hyperparams_{rand_string}.jl"); include("/tmp/.dataset_{rand_string}.jl"); include("{pkg_directory}/sr.jl"); fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})\'',
201
  ]
202
  if timeout is not None:
203
  command = [f'timeout {timeout}'] + command
 
5
  import numpy as np
6
  import pandas as pd
7
 
8
+ def pysr(X=None, y=None, weights=None,
9
+ procs=4,
10
  niterations=100,
11
  ncyclesperiteration=300,
12
  binary_operators=["plus", "mult"],
 
36
  test='simple1',
37
  verbosity=1e9,
38
  maxsize=20,
39
+ threads=None, #deprecated
40
  ):
41
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
42
  Note: most default parameters have been tuned over several example
 
45
 
46
  :param X: np.ndarray, 2D array. Rows are examples, columns are features.
47
  :param y: np.ndarray, 1D array. Rows are examples.
48
+ :param procs: int, Number of processes (=number of populations running).
 
 
49
  :param niterations: int, Number of iterations of the algorithm to run. The best
50
  equations are printed, and migrate between populations, at the
51
  end of each.
 
91
  (as strings).
92
 
93
  """
94
+ if threads is not None:
95
+ raise ValueError("The threads kwarg is deprecated. Use procs.")
96
 
97
  # Check for potential errors before they happen
98
  assert len(binary_operators) > 0
 
157
  const fractionReplacedHof = {fractionReplacedHof}f0
158
  const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
159
  const hofFile = "{equation_file}"
160
+ const nprocs = {procs:d}
161
  const nrestarts = {nrestarts:d}
162
  const perturbationFactor = {perturbationFactor:f}f0
163
  const annealing = {"true" if annealing else "false"}
 
194
  with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
195
  print(def_datasets, file=f)
196
 
197
+ with open(f'/tmp/.runfile_{rand_string}.jl', 'w') as f:
198
+ print(f'@everywhere include("/tmp/.hyperparams_{rand_string}.jl")', file=f)
199
+ print(f'@everywhere include("/tmp/.dataset_{rand_string}.jl")', file=f)
200
+ print(f'include("{pkg_directory}/sr.jl")', file=f)
201
+ print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
202
+ print(f'rmprocs(nprocs)', file=f)
203
+
204
 
205
  command = [
206
  'julia -O3',
207
+ f'-p {procs}',
208
+ f'/tmp/.runfile_{rand_string}.jl',
 
209
  ]
210
  if timeout is not None:
211
  command = [f'timeout {timeout}'] + command