File size: 10,097 Bytes
cedbbde
0557713
7f6d86d
688106d
48d465b
 
 
 
 
 
 
f50d9d6
48d465b
 
 
21ee78d
48d465b
 
 
b18ab5a
ecc6ae8
 
 
 
 
 
 
de38458
 
 
ecc6ae8
 
7b70a53
 
121e6ac
ecc6ae8
15fbc5f
fe36e3a
1f4e612
fe36e3a
 
ecc6ae8
121e6ac
eefdfef
ecc6ae8
 
 
 
121e6ac
15fbc5f
ecc6ae8
 
121e6ac
4b7293a
 
 
 
 
 
92088a8
 
 
7b70a53
2f6f790
7b70a53
 
 
 
 
92088a8
ecc6ae8
4df12c1
7b70a53
 
 
 
ecc6ae8
 
 
 
 
 
 
15fbc5f
ecc6ae8
 
 
 
 
 
 
 
 
 
 
 
 
7f6d86d
 
 
 
 
22540af
 
7f6d86d
 
 
 
 
 
22540af
 
 
1f4e612
ecc6ae8
 
 
 
 
 
 
ac2e8e0
ecc6ae8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e2655
eefdfef
15fbc5f
2ca2654
02e2655
 
 
 
 
 
ecc6ae8
 
574628b
02e2655
ecc6ae8
02e2655
ecc6ae8
 
 
fe36e3a
1f4e612
fe36e3a
 
 
 
 
92088a8
ecc6ae8
 
 
92088a8
2f6f790
 
92088a8
2f6f790
 
 
 
 
 
 
7f6d86d
2b01937
db9be07
eebd675
 
 
9fb30b3
 
eebd675
 
 
 
 
2b01937
92088a8
 
 
7f6d86d
 
 
 
 
22540af
 
7f6d86d
 
 
 
 
 
22540af
 
 
1f4e612
92088a8
2b01937
87afd92
 
eebd675
 
 
2b01937
 
92088a8
 
 
 
 
 
 
ecc6ae8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import Optim
import Printf: @printf
import Random: shuffle!, randperm

include("Equation.jl")
include("ProgramConstants.jl")
include("LossFunctions.jl")
include("Utils.jl")
include("EvaluateEquation.jl")
include("MutationFunctions.jl")
include("SimplifyEquation.jl")
include("PopMember.jl")
include("HallOfFame.jl")
include("CheckConstraints.jl")
include("Mutate.jl")
include("Population.jl")
include("RegularizedEvolution.jl")
include("SingleIteration.jl")
include("ConstantOptimization.jl")

function fullRun(niterations::Integer;
                npop::Integer=300,
                ncyclesperiteration::Integer=3000,
                fractionReplaced::Float32=0.1f0,
                verbosity::Integer=0,
                topn::Integer=10
               )

    testConfiguration()

    # 1. Start a population on every process
    allPops = Future[]
    # Set up a channel to send finished populations back to head node
    channels = [RemoteChannel(1) for j=1:npopulations]
    bestSubPops = [Population(1) for j=1:npopulations]
    hallOfFame = HallOfFame()
    frequencyComplexity = ones(Float32, actualMaxsize)
    curmaxsize = 3
    if warmupMaxsize == 0
        curmaxsize = maxsize
    end

    for i=1:npopulations
        future = @spawnat :any Population(npop, 3)
        push!(allPops, future)
    end

    # # 2. Start the cycle on every process:
    @sync for i=1:npopulations
        @async allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
    end
    println("Started!")
    cycles_complete = npopulations * niterations
    if warmupMaxsize != 0
        curmaxsize += 1
        if curmaxsize > maxsize
            curmaxsize = maxsize
        end
    end

    last_print_time = time()
    num_equations = 0.0
    print_every_n_seconds = 5
    equation_speed = Float32[]

    for i=1:npopulations
        # Start listening for each population to finish:
        @async put!(channels[i], fetch(allPops[i]))
    end

    while cycles_complete > 0
        @inbounds for i=1:npopulations
            # Non-blocking check if a population is ready:
            if isready(channels[i])
                # Take the fetch operation from the channel since its ready
                cur_pop = take!(channels[i])
                bestSubPops[i] = bestSubPop(cur_pop, topn=topn)

                #Try normal copy...
                bestPops = Population([member for pop in bestSubPops for member in pop.members])

                for member in cur_pop.members
                    size = countNodes(member.tree)
                    frequencyComplexity[size] += 1
                    if member.score < hallOfFame.members[size].score
                        hallOfFame.members[size] = deepcopy(member)
                        hallOfFame.exists[size] = true
                    end
                end

                # Dominating pareto curve - must be better than all simpler equations
                dominating = PopMember[]
                open(hofFile, "w") do io
                    println(io,"Complexity|MSE|Equation")
                    for size=1:actualMaxsize
                        if hallOfFame.exists[size]
                            member = hallOfFame.members[size]
                            if weighted
                                curMSE = MSE(evalTreeArray(member.tree), y, weights)
                            else
                                curMSE = MSE(evalTreeArray(member.tree), y)
                            end
                            numberSmallerAndBetter = 0
                            for i=1:(size-1)
                                if weighted
                                    hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
                                else
                                    hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
                                end
                                if (hallOfFame.exists[size] && curMSE > hofMSE)
                                    numberSmallerAndBetter += 1
                                end
                            end
                            betterThanAllSmaller = (numberSmallerAndBetter == 0)
                            if betterThanAllSmaller
                                println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
                                push!(dominating, member)
                            end
                        end
                    end
                end
                cp(hofFile, hofFile*".bkup", force=true)

                # Try normal copy otherwise.
                if migration
                    for k in rand(1:npop, round(Integer, npop*fractionReplaced))
                        to_copy = rand(1:size(bestPops.members)[1])
                        cur_pop.members[k] = PopMember(
                            copyNode(bestPops.members[to_copy].tree),
                            bestPops.members[to_copy].score)
                    end
                end

                if hofMigration && size(dominating)[1] > 0
                    for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
                        # Copy in case one gets used twice
                        to_copy = rand(1:size(dominating)[1])
                        cur_pop.members[k] = PopMember(
                           copyNode(dominating[to_copy].tree)
                        )
                    end
                end

                @async begin
                    allPops[i] = @spawnat :any let
                        tmp_pop = run(cur_pop, ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
                        @inbounds @simd for j=1:tmp_pop.n
                            if rand() < 0.1
                                tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
                                tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
                                if shouldOptimizeConstants
                                    tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
                                end
                            end
                        end
                        tmp_pop = finalizeScores(tmp_pop)
                        tmp_pop
                    end
                    put!(channels[i], fetch(allPops[i]))
                end

                cycles_complete -= 1
                cycles_elapsed = npopulations * niterations - cycles_complete
                if warmupMaxsize != 0 && cycles_elapsed % warmupMaxsize == 0
                    curmaxsize += 1
                    if curmaxsize > maxsize
                        curmaxsize = maxsize
                    end
                end
                num_equations += ncyclesperiteration * npop / 10.0
            end
        end
        sleep(1e-3)
        elapsed = time() - last_print_time
        #Update if time has passed, and some new equations generated.
        if elapsed > print_every_n_seconds && num_equations > 0.0
            # Dominating pareto curve - must be better than all simpler equations
            current_speed = num_equations/elapsed
            average_over_m_measurements = 10 #for print_every...=5, this gives 50 second running average
            push!(equation_speed, current_speed)
            if length(equation_speed) > average_over_m_measurements
                deleteat!(equation_speed, 1)
            end
            average_speed = sum(equation_speed)/length(equation_speed)
            curMSE = baselineMSE
            lastMSE = curMSE
            lastComplexity = 0
            if verbosity > 0
                @printf("\n")
                @printf("Cycles per second: %.3e\n", round(average_speed, sigdigits=3))
                cycles_elapsed = npopulations * niterations - cycles_complete
                @printf("Progress: %d / %d total iterations (%.3f%%)\n", cycles_elapsed, npopulations * niterations, 100.0*cycles_elapsed/(npopulations*niterations))
                @printf("Hall of Fame:\n")
                @printf("-----------------------------------------\n")
                @printf("%-10s  %-8s   %-8s  %-8s\n", "Complexity", "MSE", "Score", "Equation")
                @printf("%-10d  %-8.3e  %-8.3e  %-.f\n", 0, curMSE, 0f0, avgy)
            end

            for size=1:actualMaxsize
                if hallOfFame.exists[size]
                    member = hallOfFame.members[size]
                    if weighted
                        curMSE = MSE(evalTreeArray(member.tree), y, weights)
                    else
                        curMSE = MSE(evalTreeArray(member.tree), y)
                    end
                    numberSmallerAndBetter = 0
                    for i=1:(size-1)
                        if weighted
                            hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
                        else
                            hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
                        end
                        if (hallOfFame.exists[size] && curMSE > hofMSE)
                            numberSmallerAndBetter += 1
                        end
                    end
                    betterThanAllSmaller = (numberSmallerAndBetter == 0)
                    if betterThanAllSmaller
                        delta_c = size - lastComplexity
                        delta_l_mse = log(curMSE/lastMSE)
                        score = convert(Float32, -delta_l_mse/delta_c)
                        if verbosity > 0
                            @printf("%-10d  %-8.3e  %-8.3e  %-s\n" , size, curMSE, score, stringTree(member.tree))
                        end
                        lastMSE = curMSE
                        lastComplexity = size
                    end
                end
            end
            debug(verbosity, "")
            last_print_time = time()
            num_equations = 0.0
        end
    end
end