Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
•
ecc6ae8
1
Parent(s):
d85f644
Can call distributed processing from python
Browse files- julia/loop.jl +0 -117
- julia/sr.jl +106 -0
- pysr/sr.py +16 -8
julia/loop.jl
DELETED
@@ -1,117 +0,0 @@
|
|
1 |
-
using Distributed
|
2 |
-
const nprocs = 4
|
3 |
-
addprocs(4)
|
4 |
-
@everywhere include(".dataset_28330894764081783777.jl")
|
5 |
-
@everywhere include(".hyperparams_28330894764081783777.jl")
|
6 |
-
@everywhere include("sr.jl")
|
7 |
-
|
8 |
-
|
9 |
-
# 1. Start a population on every process
|
10 |
-
allPops = Future[]
|
11 |
-
bestSubPops = [Population(1) for j=1:nprocs]
|
12 |
-
hallOfFame = HallOfFame()
|
13 |
-
|
14 |
-
for i=1:nprocs
|
15 |
-
npop=300
|
16 |
-
future = @spawnat :any Population(npop, 3)
|
17 |
-
push!(allPops, future)
|
18 |
-
end
|
19 |
-
|
20 |
-
npop=300
|
21 |
-
ncyclesperiteration=3000
|
22 |
-
fractionReplaced=0.1f0
|
23 |
-
verbosity=convert(Int, 1e9)
|
24 |
-
topn=10
|
25 |
-
niterations=10
|
26 |
-
|
27 |
-
|
28 |
-
# # 2. Start the cycle on every process:
|
29 |
-
for i=1:nprocs
|
30 |
-
allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
|
31 |
-
end
|
32 |
-
println("Started!")
|
33 |
-
cycles_complete = nprocs * 10
|
34 |
-
while cycles_complete > 0
|
35 |
-
for i=1:nprocs
|
36 |
-
if isready(allPops[i])
|
37 |
-
cur_pop = fetch(allPops[i])
|
38 |
-
bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
|
39 |
-
|
40 |
-
#Try normal copy...
|
41 |
-
bestPops = Population([member for pop in bestSubPops for member in pop.members])
|
42 |
-
|
43 |
-
for member in cur_pop.members
|
44 |
-
size = countNodes(member.tree)
|
45 |
-
if member.score < hallOfFame.members[size].score
|
46 |
-
hallOfFame.members[size] = deepcopy(member)
|
47 |
-
hallOfFame.exists[size] = true
|
48 |
-
end
|
49 |
-
end
|
50 |
-
|
51 |
-
# Dominating pareto curve - must be better than all simpler equations
|
52 |
-
dominating = PopMember[]
|
53 |
-
open(hofFile, "w") do io
|
54 |
-
debug(verbosity, "\n")
|
55 |
-
debug(verbosity, "Hall of Fame:")
|
56 |
-
debug(verbosity, "-----------------------------------------")
|
57 |
-
debug(verbosity, "Complexity \t MSE \t Equation")
|
58 |
-
println(io,"Complexity|MSE|Equation")
|
59 |
-
for size=1:actualMaxsize
|
60 |
-
if hallOfFame.exists[size]
|
61 |
-
member = hallOfFame.members[size]
|
62 |
-
curMSE = MSE(evalTreeArray(member.tree), y)
|
63 |
-
numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
|
64 |
-
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
65 |
-
if betterThanAllSmaller
|
66 |
-
debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
|
67 |
-
println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
|
68 |
-
push!(dominating, member)
|
69 |
-
end
|
70 |
-
end
|
71 |
-
end
|
72 |
-
debug(verbosity, "")
|
73 |
-
end
|
74 |
-
|
75 |
-
# Try normal copy otherwise.
|
76 |
-
if migration
|
77 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplaced))
|
78 |
-
to_copy = rand(1:size(bestPops.members)[1])
|
79 |
-
cur_pop.members[k] = PopMember(
|
80 |
-
copyNode(bestPops.members[to_copy].tree),
|
81 |
-
bestPops.members[to_copy].score)
|
82 |
-
end
|
83 |
-
end
|
84 |
-
|
85 |
-
if hofMigration && size(dominating)[1] > 0
|
86 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
|
87 |
-
# Copy in case one gets used twice
|
88 |
-
to_copy = rand(1:size(dominating)[1])
|
89 |
-
cur_pop.members[k] = PopMember(
|
90 |
-
copyNode(dominating[to_copy].tree)
|
91 |
-
)
|
92 |
-
end
|
93 |
-
end
|
94 |
-
|
95 |
-
allPops[i] = @spawnat :any let
|
96 |
-
tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
|
97 |
-
for j=1:tmp_pop.n
|
98 |
-
if rand() < 0.1
|
99 |
-
tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
|
100 |
-
tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
|
101 |
-
if shouldOptimizeConstants
|
102 |
-
tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
|
103 |
-
end
|
104 |
-
end
|
105 |
-
end
|
106 |
-
tmp_pop
|
107 |
-
end
|
108 |
-
|
109 |
-
global cycles_complete -= 1
|
110 |
-
end
|
111 |
-
end
|
112 |
-
sleep(1e-3)
|
113 |
-
end
|
114 |
-
|
115 |
-
rmprocs(nprocs)
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/sr.jl
CHANGED
@@ -738,3 +738,109 @@ mutable struct HallOfFame
|
|
738 |
end
|
739 |
|
740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
738 |
end
|
739 |
|
740 |
|
741 |
+
function fullRun(niterations::Integer;
|
742 |
+
npop::Integer=300,
|
743 |
+
ncyclesperiteration::Integer=3000,
|
744 |
+
fractionReplaced::Float32=0.1f0,
|
745 |
+
verbosity::Integer=0,
|
746 |
+
topn::Integer=10
|
747 |
+
)
|
748 |
+
# 1. Start a population on every process
|
749 |
+
allPops = Future[]
|
750 |
+
bestSubPops = [Population(1) for j=1:nprocs]
|
751 |
+
hallOfFame = HallOfFame()
|
752 |
+
|
753 |
+
for i=1:nprocs
|
754 |
+
npop=300
|
755 |
+
future = @spawnat :any Population(npop, 3)
|
756 |
+
push!(allPops, future)
|
757 |
+
end
|
758 |
+
|
759 |
+
# # 2. Start the cycle on every process:
|
760 |
+
for i=1:nprocs
|
761 |
+
allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
|
762 |
+
end
|
763 |
+
println("Started!")
|
764 |
+
cycles_complete = nprocs * 10
|
765 |
+
while cycles_complete > 0
|
766 |
+
for i=1:nprocs
|
767 |
+
if isready(allPops[i])
|
768 |
+
cur_pop = fetch(allPops[i])
|
769 |
+
bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
|
770 |
+
|
771 |
+
#Try normal copy...
|
772 |
+
bestPops = Population([member for pop in bestSubPops for member in pop.members])
|
773 |
+
|
774 |
+
for member in cur_pop.members
|
775 |
+
size = countNodes(member.tree)
|
776 |
+
if member.score < hallOfFame.members[size].score
|
777 |
+
hallOfFame.members[size] = deepcopy(member)
|
778 |
+
hallOfFame.exists[size] = true
|
779 |
+
end
|
780 |
+
end
|
781 |
+
|
782 |
+
# Dominating pareto curve - must be better than all simpler equations
|
783 |
+
dominating = PopMember[]
|
784 |
+
open(hofFile, "w") do io
|
785 |
+
debug(verbosity, "\n")
|
786 |
+
debug(verbosity, "Hall of Fame:")
|
787 |
+
debug(verbosity, "-----------------------------------------")
|
788 |
+
debug(verbosity, "Complexity \t MSE \t Equation")
|
789 |
+
println(io,"Complexity|MSE|Equation")
|
790 |
+
for size=1:actualMaxsize
|
791 |
+
if hallOfFame.exists[size]
|
792 |
+
member = hallOfFame.members[size]
|
793 |
+
curMSE = MSE(evalTreeArray(member.tree), y)
|
794 |
+
numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
|
795 |
+
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
796 |
+
if betterThanAllSmaller
|
797 |
+
debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
|
798 |
+
println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
|
799 |
+
push!(dominating, member)
|
800 |
+
end
|
801 |
+
end
|
802 |
+
end
|
803 |
+
debug(verbosity, "")
|
804 |
+
end
|
805 |
+
|
806 |
+
# Try normal copy otherwise.
|
807 |
+
if migration
|
808 |
+
for k in rand(1:npop, round(Integer, npop*fractionReplaced))
|
809 |
+
to_copy = rand(1:size(bestPops.members)[1])
|
810 |
+
cur_pop.members[k] = PopMember(
|
811 |
+
copyNode(bestPops.members[to_copy].tree),
|
812 |
+
bestPops.members[to_copy].score)
|
813 |
+
end
|
814 |
+
end
|
815 |
+
|
816 |
+
if hofMigration && size(dominating)[1] > 0
|
817 |
+
for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
|
818 |
+
# Copy in case one gets used twice
|
819 |
+
to_copy = rand(1:size(dominating)[1])
|
820 |
+
cur_pop.members[k] = PopMember(
|
821 |
+
copyNode(dominating[to_copy].tree)
|
822 |
+
)
|
823 |
+
end
|
824 |
+
end
|
825 |
+
|
826 |
+
allPops[i] = @spawnat :any let
|
827 |
+
tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
|
828 |
+
for j=1:tmp_pop.n
|
829 |
+
if rand() < 0.1
|
830 |
+
tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
|
831 |
+
tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
|
832 |
+
if shouldOptimizeConstants
|
833 |
+
tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
|
834 |
+
end
|
835 |
+
end
|
836 |
+
end
|
837 |
+
tmp_pop
|
838 |
+
end
|
839 |
+
|
840 |
+
cycles_complete -= 1
|
841 |
+
end
|
842 |
+
end
|
843 |
+
sleep(1e-3)
|
844 |
+
end
|
845 |
+
end
|
846 |
+
|
pysr/sr.py
CHANGED
@@ -5,7 +5,8 @@ import pathlib
|
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
|
8 |
-
def pysr(X=None, y=None, weights=None,
|
|
|
9 |
niterations=100,
|
10 |
ncyclesperiteration=300,
|
11 |
binary_operators=["plus", "mult"],
|
@@ -35,6 +36,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
35 |
test='simple1',
|
36 |
verbosity=1e9,
|
37 |
maxsize=20,
|
|
|
38 |
):
|
39 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
40 |
Note: most default parameters have been tuned over several example
|
@@ -43,9 +45,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
43 |
|
44 |
:param X: np.ndarray, 2D array. Rows are examples, columns are features.
|
45 |
:param y: np.ndarray, 1D array. Rows are examples.
|
46 |
-
:param
|
47 |
-
You can have more threads than cores - it actually makes it more
|
48 |
-
efficient.
|
49 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
50 |
equations are printed, and migrate between populations, at the
|
51 |
end of each.
|
@@ -91,6 +91,8 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
91 |
(as strings).
|
92 |
|
93 |
"""
|
|
|
|
|
94 |
|
95 |
# Check for potential errors before they happen
|
96 |
assert len(binary_operators) > 0
|
@@ -155,7 +157,7 @@ const hofMigration = {'true' if hofMigration else 'false'}
|
|
155 |
const fractionReplacedHof = {fractionReplacedHof}f0
|
156 |
const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
|
157 |
const hofFile = "{equation_file}"
|
158 |
-
const
|
159 |
const nrestarts = {nrestarts:d}
|
160 |
const perturbationFactor = {perturbationFactor:f}f0
|
161 |
const annealing = {"true" if annealing else "false"}
|
@@ -192,12 +194,18 @@ const weights = convert(Array{Float32, 1}, """f"{weight_str})"
|
|
192 |
with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
|
193 |
print(def_datasets, file=f)
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
command = [
|
197 |
'julia -O3',
|
198 |
-
'
|
199 |
-
'
|
200 |
-
f'\'include("/tmp/.hyperparams_{rand_string}.jl"); include("/tmp/.dataset_{rand_string}.jl"); include("{pkg_directory}/sr.jl"); fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})\'',
|
201 |
]
|
202 |
if timeout is not None:
|
203 |
command = [f'timeout {timeout}'] + command
|
|
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
|
8 |
+
def pysr(X=None, y=None, weights=None,
|
9 |
+
procs=4,
|
10 |
niterations=100,
|
11 |
ncyclesperiteration=300,
|
12 |
binary_operators=["plus", "mult"],
|
|
|
36 |
test='simple1',
|
37 |
verbosity=1e9,
|
38 |
maxsize=20,
|
39 |
+
threads=None, #deprecated
|
40 |
):
|
41 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
42 |
Note: most default parameters have been tuned over several example
|
|
|
45 |
|
46 |
:param X: np.ndarray, 2D array. Rows are examples, columns are features.
|
47 |
:param y: np.ndarray, 1D array. Rows are examples.
|
48 |
+
:param procs: int, Number of processes (=number of populations running).
|
|
|
|
|
49 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
50 |
equations are printed, and migrate between populations, at the
|
51 |
end of each.
|
|
|
91 |
(as strings).
|
92 |
|
93 |
"""
|
94 |
+
if threads is not None:
|
95 |
+
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
96 |
|
97 |
# Check for potential errors before they happen
|
98 |
assert len(binary_operators) > 0
|
|
|
157 |
const fractionReplacedHof = {fractionReplacedHof}f0
|
158 |
const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
|
159 |
const hofFile = "{equation_file}"
|
160 |
+
const nprocs = {procs:d}
|
161 |
const nrestarts = {nrestarts:d}
|
162 |
const perturbationFactor = {perturbationFactor:f}f0
|
163 |
const annealing = {"true" if annealing else "false"}
|
|
|
194 |
with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
|
195 |
print(def_datasets, file=f)
|
196 |
|
197 |
+
with open(f'/tmp/.runfile_{rand_string}.jl', 'w') as f:
|
198 |
+
print(f'@everywhere include("/tmp/.hyperparams_{rand_string}.jl")', file=f)
|
199 |
+
print(f'@everywhere include("/tmp/.dataset_{rand_string}.jl")', file=f)
|
200 |
+
print(f'include("{pkg_directory}/sr.jl")', file=f)
|
201 |
+
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
202 |
+
print(f'rmprocs(nprocs)', file=f)
|
203 |
+
|
204 |
|
205 |
command = [
|
206 |
'julia -O3',
|
207 |
+
f'-p {procs}',
|
208 |
+
f'/tmp/.runfile_{rand_string}.jl',
|
|
|
209 |
]
|
210 |
if timeout is not None:
|
211 |
command = [f'timeout {timeout}'] + command
|