Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Feb 1, 2021

Commit

f784534

unverified ·

2 Parent(s): 8f3db29 b935024

Merge pull request #26 from MilesCranmer/separate-packages

Browse files

Files changed (26) hide show

.travis.yml +1 -1
Project.toml +6 -0
README.md +5 -12
TODO.md +2 -0
julia/CheckConstraints.jl +0 -42
julia/ConstantOptimization.jl +0 -49
julia/Equation.jl +0 -173
julia/EvaluateEquation.jl +0 -47
julia/LossFunctions.jl +0 -82
julia/Mutate.jl +0 -124
julia/MutationFunctions.jl +0 -239
julia/Operators.jl +0 -56
julia/PopMember.jl +0 -10
julia/Population.jl +0 -40
julia/ProgramConstants.jl +0 -9
julia/RegularizedEvolution.jl +0 -46
julia/SimplifyEquation.jl +0 -106
julia/SingleIteration.jl +0 -28
julia/Utils.jl +0 -34
julia/halloffame.jl +0 -8
julia/sr.jl +0 -218
julia/truth.jl +0 -77
julia/truthPops.jl +0 -170
pysr/sr.py +138 -136
setup.py +2 -2
test/test.py +4 -4

.travis.yml CHANGED Viewed

@@ -19,7 +19,7 @@ jobs:
 install: pip3 install --upgrade pip
 before_script:
-    - julia --color=yes -e 'using Pkg; pkg"add Optim; add SpecialFunctions; precompile;"'
 script:
     - pip3 install numpy pandas

 install: pip3 install --upgrade pip
 before_script:
+    - julia --color=yes -e 'using Pkg; pkg"add SymbolicRegression; precompile;"'
 script:
     - pip3 install numpy pandas

Project.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[deps]
+SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
+[compat]
+SymbolicRegression = "0.4"
+julia = "1"

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ Uses regularized evolution, simulated annealing, and gradient-free optimization.
 [Documentation](https://pysr.readthedocs.io/en/latest)
 Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
-the pure-Julia version of this package.
 Symbolic regression is a very interpretable machine learning algorithm
 for low-dimensional problems: these tools search equation space
@@ -51,22 +51,15 @@ Install Julia - see [downloads](https://julialang.org/downloads/), and
 then instructions for [mac](https://julialang.org/downloads/platform/#macos)
 and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
 (Don't use the `conda-forge` version; it doesn't seem to work properly.)
-Then, at the command line,
-install and precompile the `Optim` and `SpecialFunctions`
-packages via:
-```bash
-julia -e 'using Pkg; pkg"add Optim; add SpecialFunctions; precompile;"'
-```
-For python, you need to have Python 3, numpy, sympy, and pandas installed.
-You can install this package from PyPI with:
 ```bash
 pip install pysr
 ```
 # Quickstart
 Here is some demo code (also found in `example.py`)

 [Documentation](https://pysr.readthedocs.io/en/latest)
 Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
+the pure-Julia backend of this package.
 Symbolic regression is a very interpretable machine learning algorithm
 for low-dimensional problems: these tools search equation space
 then instructions for [mac](https://julialang.org/downloads/platform/#macos)
 and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
 (Don't use the `conda-forge` version; it doesn't seem to work properly.)
+You can install PySR with:
 ```bash
 pip install pysr
 ```
+The first launch will automatically install the Julia packages
+required.
 # Quickstart
 Here is some demo code (also found in `example.py`)

TODO.md CHANGED Viewed

@@ -62,6 +62,8 @@
 - [x] Sympy printing
 - [ ] Sort these todo lists by priority
 ## Feature ideas
 - [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).

 - [x] Sympy printing
 - [ ] Sort these todo lists by priority
+- [ ] Automatically convert log, log10, log2, pow to the correct operators.
 ## Feature ideas
 - [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string).

julia/CheckConstraints.jl DELETED Viewed

@@ -1,42 +0,0 @@
-# Check if any binary operator are overly complex
-function flagBinOperatorComplexity(tree::Node, op::Int)::Bool
-    if tree.degree == 0
-        return false
-    elseif tree.degree == 1
-        return flagBinOperatorComplexity(tree.l, op)
-    else
-        if tree.op == op
-            overly_complex = (
-                    ((bin_constraints[op][1] > -1) &&
-                     (countNodes(tree.l) > bin_constraints[op][1]))
-                      ||
-                    ((bin_constraints[op][2] > -1) &&
-                     (countNodes(tree.r) > bin_constraints[op][2]))
-                )
-            if overly_complex
-                return true
-            end
-        end
-        return (flagBinOperatorComplexity(tree.l, op) || flagBinOperatorComplexity(tree.r, op))
-    end
-end
-# Check if any unary operators are overly complex
-function flagUnaOperatorComplexity(tree::Node, op::Int)::Bool
-    if tree.degree == 0
-        return false
-    elseif tree.degree == 1
-        if tree.op == op
-            overly_complex = (
-                      (una_constraints[op] > -1) &&
-                      (countNodes(tree.l) > una_constraints[op])
-                )
-            if overly_complex
-                return true
-            end
-        end
-        return flagUnaOperatorComplexity(tree.l, op)
-    else
-        return (flagUnaOperatorComplexity(tree.l, op) || flagUnaOperatorComplexity(tree.r, op))
-    end
-end

julia/ConstantOptimization.jl DELETED Viewed

@@ -1,49 +0,0 @@
-import Optim
-# Proxy function for optimization
-function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
-    setConstants(tree, x)
-    return scoreFunc(tree)
-end
-# Use Nelder-Mead to optimize the constants in an equation
-function optimizeConstants(member::PopMember)::PopMember
-    nconst = countConstants(member.tree)
-    if nconst == 0
-        return member
-    end
-    x0 = getConstants(member.tree)
-    f(x::Array{Float32,1})::Float32 = optFunc(x, member.tree)
-    if size(x0)[1] == 1
-        algorithm = Optim.Newton
-    else
-        algorithm = Optim.NelderMead
-    end
-    try
-        result = Optim.optimize(f, x0, algorithm(), Optim.Options(iterations=100))
-        # Try other initial conditions:
-        for i=1:nrestarts
-            tmpresult = Optim.optimize(f, x0 .* (1f0 .+ 5f-1*randn(Float32, size(x0)[1])), algorithm(), Optim.Options(iterations=100))
-            if tmpresult.minimum < result.minimum
-                result = tmpresult
-            end
-        end
-        if Optim.converged(result)
-            setConstants(member.tree, result.minimizer)
-            member.score = convert(Float32, result.minimum)
-            member.birth = getTime()
-        else
-            setConstants(member.tree, x0)
-        end
-    catch error
-        # Fine if optimization encountered domain error, just return x0
-        if isa(error, AssertionError)
-            setConstants(member.tree, x0)
-        else
-            throw(error)
-        end
-    end
-    return member
-end

julia/Equation.jl DELETED Viewed

@@ -1,173 +0,0 @@
-# Define a serialization format for the symbolic equations:
-mutable struct Node
-    #Holds operators, variables, constants in a tree
-    degree::Integer #0 for constant/variable, 1 for cos/sin, 2 for +/* etc.
-    val::Union{Float32, Integer} #Either const value, or enumerates variable
-    constant::Bool #false if variable
-    op::Integer #enumerates operator (separately for degree=1,2)
-    l::Union{Node, Nothing}
-    r::Union{Node, Nothing}
-    Node(val::Float32) = new(0, val, true, 1, nothing, nothing)
-    Node(val::Integer) = new(0, val, false, 1, nothing, nothing)
-    Node(op::Integer, l::Node) = new(1, 0.0f0, false, op, l, nothing)
-    Node(op::Integer, l::Union{Float32, Integer}) = new(1, 0.0f0, false, op, Node(l), nothing)
-    Node(op::Integer, l::Node, r::Node) = new(2, 0.0f0, false, op, l, r)
-    #Allow to pass the leaf value without additional node call:
-    Node(op::Integer, l::Union{Float32, Integer}, r::Node) = new(2, 0.0f0, false, op, Node(l), r)
-    Node(op::Integer, l::Node, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, l, Node(r))
-    Node(op::Integer, l::Union{Float32, Integer}, r::Union{Float32, Integer}) = new(2, 0.0f0, false, op, Node(l), Node(r))
-end
-# Copy an equation (faster than deepcopy)
-function copyNode(tree::Node)::Node
-   if tree.degree == 0
-       return Node(tree.val)
-   elseif tree.degree == 1
-       return Node(tree.op, copyNode(tree.l))
-    else
-        return Node(tree.op, copyNode(tree.l), copyNode(tree.r))
-   end
-end
-# Count the operators, constants, variables in an equation
-function countNodes(tree::Node)::Integer
-    if tree.degree == 0
-        return 1
-    elseif tree.degree == 1
-        return 1 + countNodes(tree.l)
-    else
-        return 1 + countNodes(tree.l) + countNodes(tree.r)
-    end
-end
-# Count the max depth of a tree
-function countDepth(tree::Node)::Integer
-    if tree.degree == 0
-        return 1
-    elseif tree.degree == 1
-        return 1 + countDepth(tree.l)
-    else
-        return 1 + max(countDepth(tree.l), countDepth(tree.r))
-    end
-end
-# Convert an equation to a string
-function stringTree(tree::Node)::String
-    if tree.degree == 0
-        if tree.constant
-            return string(tree.val)
-        else
-            if useVarMap
-                return varMap[tree.val]
-            else
-                return "x$(tree.val - 1)"
-            end
-        end
-    elseif tree.degree == 1
-        return "$(unaops[tree.op])($(stringTree(tree.l)))"
-    else
-        return "$(binops[tree.op])($(stringTree(tree.l)), $(stringTree(tree.r)))"
-    end
-end
-# Print an equation
-function printTree(tree::Node)
-    println(stringTree(tree))
-end
-# Return a random node from the tree
-function randomNode(tree::Node)::Node
-    if tree.degree == 0
-        return tree
-    end
-    a = countNodes(tree)
-    b = 0
-    c = 0
-    if tree.degree >= 1
-        b = countNodes(tree.l)
-    end
-    if tree.degree == 2
-        c = countNodes(tree.r)
-    end
-    i = rand(1:1+b+c)
-    if i <= b
-        return randomNode(tree.l)
-    elseif i == b + 1
-        return tree
-    end
-    return randomNode(tree.r)
-end
-# Count the number of unary operators in the equation
-function countUnaryOperators(tree::Node)::Integer
-    if tree.degree == 0
-        return 0
-    elseif tree.degree == 1
-        return 1 + countUnaryOperators(tree.l)
-    else
-        return 0 + countUnaryOperators(tree.l) + countUnaryOperators(tree.r)
-    end
-end
-# Count the number of binary operators in the equation
-function countBinaryOperators(tree::Node)::Integer
-    if tree.degree == 0
-        return 0
-    elseif tree.degree == 1
-        return 0 + countBinaryOperators(tree.l)
-    else
-        return 1 + countBinaryOperators(tree.l) + countBinaryOperators(tree.r)
-    end
-end
-# Count the number of operators in the equation
-function countOperators(tree::Node)::Integer
-    return countUnaryOperators(tree) + countBinaryOperators(tree)
-end
-# Count the number of constants in an equation
-function countConstants(tree::Node)::Integer
-    if tree.degree == 0
-        return convert(Integer, tree.constant)
-    elseif tree.degree == 1
-        return 0 + countConstants(tree.l)
-    else
-        return 0 + countConstants(tree.l) + countConstants(tree.r)
-    end
-end
-# Get all the constants from a tree
-function getConstants(tree::Node)::Array{Float32, 1}
-    if tree.degree == 0
-        if tree.constant
-            return [tree.val]
-        else
-            return Float32[]
-        end
-    elseif tree.degree == 1
-        return getConstants(tree.l)
-    else
-        both = [getConstants(tree.l), getConstants(tree.r)]
-        return [constant for subtree in both for constant in subtree]
-    end
-end
-# Set all the constants inside a tree
-function setConstants(tree::Node, constants::Array{Float32, 1})
-    if tree.degree == 0
-        if tree.constant
-            tree.val = constants[1]
-        end
-    elseif tree.degree == 1
-        setConstants(tree.l, constants)
-    else
-        numberLeft = countConstants(tree.l)
-        setConstants(tree.l, constants)
-        setConstants(tree.r, constants[numberLeft+1:end])
-    end
-end

julia/EvaluateEquation.jl DELETED Viewed

@@ -1,47 +0,0 @@
-# Evaluate an equation over an array of datapoints
-function evalTreeArray(tree::Node)::Union{Array{Float32, 1}, Nothing}
-    return evalTreeArray(tree, X)
-end
-# Evaluate an equation over an array of datapoints
-function evalTreeArray(tree::Node, cX::Array{Float32, 2})::Union{Array{Float32, 1}, Nothing}
-    clen = size(cX)[1]
-    if tree.degree == 0
-        if tree.constant
-            return fill(tree.val, clen)
-        else
-            return copy(cX[:, tree.val])
-        end
-    elseif tree.degree == 1
-        cumulator = evalTreeArray(tree.l, cX)
-        if cumulator === nothing
-            return nothing
-        end
-        op_idx = tree.op
-        UNAOP!(cumulator, op_idx, clen)
-        @inbounds for i=1:clen
-            if isinf(cumulator[i]) || isnan(cumulator[i])
-                return nothing
-            end
-        end
-        return cumulator
-    else
-        cumulator = evalTreeArray(tree.l, cX)
-        if cumulator === nothing
-            return nothing
-        end
-        array2 = evalTreeArray(tree.r, cX)
-        if array2 === nothing
-            return nothing
-        end
-        op_idx = tree.op
-        BINOP!(cumulator, array2, op_idx, clen)
-        @inbounds for i=1:clen
-            if isinf(cumulator[i]) || isnan(cumulator[i])
-                return nothing
-            end
-        end
-        return cumulator
-    end
-end

julia/LossFunctions.jl DELETED Viewed

@@ -1,82 +0,0 @@
-import Random: randperm
-# Sum of square error between two arrays
-function SSE(x::Array{Float32}, y::Array{Float32})::Float32
-    diff = (x - y)
-    return sum(diff .* diff)
-end
-function SSE(x::Nothing, y::Array{Float32})::Float32
-    return 1f9
-end
-# Sum of square error between two arrays, with weights
-function SSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
-    diff = (x - y)
-    return sum(diff .* diff .* w)
-end
-function SSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
-    return Nothing
-end
-# Mean of square error between two arrays
-function MSE(x::Nothing, y::Array{Float32})::Float32
-    return 1f9
-end
-# Mean of square error between two arrays
-function MSE(x::Array{Float32}, y::Array{Float32})::Float32
-    return SSE(x, y)/size(x)[1]
-end
-# Mean of square error between two arrays
-function MSE(x::Nothing, y::Array{Float32}, w::Array{Float32})::Float32
-    return 1f9
-end
-# Mean of square error between two arrays
-function MSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
-    return SSE(x, y, w)/sum(w)
-end
-if weighted
-    const avgy = sum(y .* weights)/sum(weights)
-    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy), weights)
-else
-    const avgy = sum(y)/len
-    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy))
-end
-# Score an equation
-function scoreFunc(tree::Node)::Float32
-    prediction = evalTreeArray(tree)
-    if prediction === nothing
-        return 1f9
-    end
-    if weighted
-        mse = MSE(prediction, y, weights)
-    else
-        mse = MSE(prediction, y)
-    end
-    return mse / baselineMSE + countNodes(tree)*parsimony
-end
-# Score an equation with a small batch
-function scoreFuncBatch(tree::Node)::Float32
-    # batchSize
-    batch_idx = randperm(len)[1:batchSize]
-    batch_X = X[batch_idx, :]
-    prediction = evalTreeArray(tree, batch_X)
-    if prediction === nothing
-        return 1f9
-    end
-    size_adjustment = 1f0
-    batch_y = y[batch_idx]
-    if weighted
-        batch_w = weights[batch_idx]
-        mse = MSE(prediction, batch_y, batch_w)
-        size_adjustment = 1f0 * len / batchSize
-    else
-        mse = MSE(prediction, batch_y)
-    end
-    return size_adjustment * mse / baselineMSE + countNodes(tree)*parsimony
-end

julia/Mutate.jl DELETED Viewed

@@ -1,124 +0,0 @@
-# Go through one mutation cycle
-function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyComplexity::Array{Float32, 1})::PopMember
-    prev = member.tree
-    tree = prev
-    #TODO - reconsider this
-    if batching
-        beforeLoss = scoreFuncBatch(prev)
-    else
-        beforeLoss = member.score
-    end
-    mutationChoice = rand()
-    #More constants => more likely to do constant mutation
-    weightAdjustmentMutateConstant = min(8, countConstants(prev))/8.0
-    cur_weights = copy(mutationWeights) .* 1.0
-    cur_weights[1] *= weightAdjustmentMutateConstant
-    n = countNodes(prev)
-    depth = countDepth(prev)
-    # If equation too big, don't add new operators
-    if n >= curmaxsize || depth >= maxdepth
-        cur_weights[3] = 0.0
-        cur_weights[4] = 0.0
-    end
-    cur_weights /= sum(cur_weights)
-    cweights = cumsum(cur_weights)
-    successful_mutation = false
-    #TODO: Currently we dont take this \/ into account
-    is_success_always_possible = true
-    attempts = 0
-    max_attempts = 10
-    #############################################
-    # Mutations
-    #############################################
-    while (!successful_mutation) && attempts < max_attempts
-        tree = copyNode(prev)
-        successful_mutation = true
-        if mutationChoice < cweights[1]
-            tree = mutateConstant(tree, T)
-            is_success_always_possible = true
-            # Mutating a constant shouldn't invalidate an already-valid function
-        elseif mutationChoice < cweights[2]
-            tree = mutateOperator(tree)
-            is_success_always_possible = true
-            # Can always mutate to the same operator
-        elseif mutationChoice < cweights[3]
-            if rand() < 0.5
-                tree = appendRandomOp(tree)
-            else
-                tree = prependRandomOp(tree)
-            end
-            is_success_always_possible = false
-            # Can potentially have a situation without success
-        elseif mutationChoice < cweights[4]
-            tree = insertRandomOp(tree)
-            is_success_always_possible = false
-        elseif mutationChoice < cweights[5]
-            tree = deleteRandomOp(tree)
-            is_success_always_possible = true
-        elseif mutationChoice < cweights[6]
-            tree = simplifyTree(tree) # Sometimes we simplify tree
-            tree = combineOperators(tree) # See if repeated constants at outer levels
-            return PopMember(tree, beforeLoss)
-            is_success_always_possible = true
-            # Simplification shouldn't hurt complexity; unless some non-symmetric constraint
-            # to commutative operator...
-        elseif mutationChoice < cweights[7]
-            tree = genRandomTree(5) # Sometimes we generate a new tree completely tree
-            is_success_always_possible = true
-        else # no mutation applied
-            return PopMember(tree, beforeLoss)
-        end
-        # Check for illegal equations
-        for i=1:nbin
-            if successful_mutation && flagBinOperatorComplexity(tree, i)
-                successful_mutation = false
-            end
-        end
-        for i=1:nuna
-            if successful_mutation && flagUnaOperatorComplexity(tree, i)
-                successful_mutation = false
-            end
-        end
-        attempts += 1
-    end
-    #############################################
-    if !successful_mutation
-        return PopMember(copyNode(prev), beforeLoss)
-    end
-    if batching
-        afterLoss = scoreFuncBatch(tree)
-    else
-        afterLoss = scoreFunc(tree)
-    end
-    if annealing
-        delta = afterLoss - beforeLoss
-        probChange = exp(-delta/(T*alpha))
-        if useFrequency
-            oldSize = countNodes(prev)
-            newSize = countNodes(tree)
-            probChange *= frequencyComplexity[oldSize] / frequencyComplexity[newSize]
-        end
-        return_unaltered = (isnan(afterLoss) || probChange < rand())
-        if return_unaltered
-            return PopMember(copyNode(prev), beforeLoss)
-        end
-    end
-    return PopMember(tree, afterLoss)
-end

julia/MutationFunctions.jl DELETED Viewed

@@ -1,239 +0,0 @@
-# Randomly convert an operator into another one (binary->binary;
-# unary->unary)
-function mutateOperator(tree::Node)::Node
-    if countOperators(tree) == 0
-        return tree
-    end
-    node = randomNode(tree)
-    while node.degree == 0
-        node = randomNode(tree)
-    end
-    if node.degree == 1
-        node.op = rand(1:length(unaops))
-    else
-        node.op = rand(1:length(binops))
-    end
-    return tree
-end
-# Randomly perturb a constant
-function mutateConstant(
-        tree::Node, T::Float32,
-        probNegate::Float32=0.01f0)::Node
-    # T is between 0 and 1.
-    if countConstants(tree) == 0
-        return tree
-    end
-    node = randomNode(tree)
-    while node.degree != 0 || node.constant == false
-        node = randomNode(tree)
-    end
-    bottom = 0.1f0
-    maxChange = perturbationFactor * T + 1.0f0 + bottom
-    factor = maxChange^Float32(rand())
-    makeConstBigger = rand() > 0.5
-    if makeConstBigger
-        node.val *= factor
-    else
-        node.val /= factor
-    end
-    if rand() > probNegate
-        node.val *= -1
-    end
-    return tree
-end
-# Add a random unary/binary operation to the end of a tree
-function appendRandomOp(tree::Node)::Node
-    node = randomNode(tree)
-    while node.degree != 0
-        node = randomNode(tree)
-    end
-    choice = rand()
-    makeNewBinOp = choice < nbin/nops
-    if rand() > 0.5
-        left = Float32(randn())
-    else
-        left = rand(1:nvar)
-    end
-    if rand() > 0.5
-        right = Float32(randn())
-    else
-        right = rand(1:nvar)
-    end
-    if makeNewBinOp
-        newnode = Node(
-            rand(1:length(binops)),
-            left,
-            right
-        )
-    else
-        newnode = Node(
-            rand(1:length(unaops)),
-            left
-        )
-    end
-    node.l = newnode.l
-    node.r = newnode.r
-    node.op = newnode.op
-    node.degree = newnode.degree
-    node.val = newnode.val
-    node.constant = newnode.constant
-    return tree
-end
-# Insert random node
-function insertRandomOp(tree::Node)::Node
-    node = randomNode(tree)
-    choice = rand()
-    makeNewBinOp = choice < nbin/nops
-    left = copyNode(node)
-    if makeNewBinOp
-        right = randomConstantNode()
-        newnode = Node(
-            rand(1:length(binops)),
-            left,
-            right
-        )
-    else
-        newnode = Node(
-            rand(1:length(unaops)),
-            left
-        )
-    end
-    node.l = newnode.l
-    node.r = newnode.r
-    node.op = newnode.op
-    node.degree = newnode.degree
-    node.val = newnode.val
-    node.constant = newnode.constant
-    return tree
-end
-# Add random node to the top of a tree
-function prependRandomOp(tree::Node)::Node
-    node = tree
-    choice = rand()
-    makeNewBinOp = choice < nbin/nops
-    left = copyNode(tree)
-    if makeNewBinOp
-        right = randomConstantNode()
-        newnode = Node(
-            rand(1:length(binops)),
-            left,
-            right
-        )
-    else
-        newnode = Node(
-            rand(1:length(unaops)),
-            left
-        )
-    end
-    node.l = newnode.l
-    node.r = newnode.r
-    node.op = newnode.op
-    node.degree = newnode.degree
-    node.val = newnode.val
-    node.constant = newnode.constant
-    return node
-end
-function randomConstantNode()::Node
-    if rand() > 0.5
-        val = Float32(randn())
-    else
-        val = rand(1:nvar)
-    end
-    newnode = Node(val)
-    return newnode
-end
-# Return a random node from the tree with parent
-function randomNodeAndParent(tree::Node, parent::Union{Node, Nothing})::Tuple{Node, Union{Node, Nothing}}
-    if tree.degree == 0
-        return tree, parent
-    end
-    a = countNodes(tree)
-    b = 0
-    c = 0
-    if tree.degree >= 1
-        b = countNodes(tree.l)
-    end
-    if tree.degree == 2
-        c = countNodes(tree.r)
-    end
-    i = rand(1:1+b+c)
-    if i <= b
-        return randomNodeAndParent(tree.l, tree)
-    elseif i == b + 1
-        return tree, parent
-    end
-    return randomNodeAndParent(tree.r, tree)
-end
-# Select a random node, and replace it an the subtree
-# with a variable or constant
-function deleteRandomOp(tree::Node)::Node
-    node, parent = randomNodeAndParent(tree, nothing)
-    isroot = (parent === nothing)
-    if node.degree == 0
-        # Replace with new constant
-        newnode = randomConstantNode()
-        node.l = newnode.l
-        node.r = newnode.r
-        node.op = newnode.op
-        node.degree = newnode.degree
-        node.val = newnode.val
-        node.constant = newnode.constant
-    elseif node.degree == 1
-        # Join one of the children with the parent
-        if isroot
-            return node.l
-        elseif parent.l == node
-            parent.l = node.l
-        else
-            parent.r = node.l
-        end
-    else
-        # Join one of the children with the parent
-        if rand() < 0.5
-            if isroot
-                return node.l
-            elseif parent.l == node
-                parent.l = node.l
-            else
-                parent.r = node.l
-            end
-        else
-            if isroot
-                return node.r
-            elseif parent.l == node
-                parent.l = node.r
-            else
-                parent.r = node.r
-            end
-        end
-    end
-    return tree
-end
-# Create a random equation by appending random operators
-function genRandomTree(length::Integer)::Node
-    tree = Node(1.0f0)
-    for i=1:length
-        tree = appendRandomOp(tree)
-    end
-    return tree
-end

julia/Operators.jl DELETED Viewed

@@ -1,56 +0,0 @@
-import SpecialFunctions: gamma, lgamma, erf, erfc, beta
-import Base.FastMath: sqrt_llvm_fast, neg_float_fast,
-    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
-    eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast,
-    sign_fast, abs_fast, log_fast, log2_fast, log10_fast, sqrt_fast,
-    pow_fast
-# Implicitly defined:
-#binary: mod
-#unary: exp, abs, log1p, sin, cos, tan, sinh, cosh, tanh, asin, acos, atan, asinh, acosh, atanh, erf, erfc, gamma, relu, round, floor, ceil, round, sign.
-# Use some fast operators from https://github.com/JuliaLang/julia/blob/81597635c4ad1e8c2e1c5753fda4ec0e7397543f/base/fastmath.jl
-# Define allowed operators. Any julia operator can also be used.
-plus(x::Float32, y::Float32)::Float32 = add_float_fast(x, y) #Do not change the name of this operator.
-sub(x::Float32, y::Float32)::Float32 = sub_float_fast(x, y) #Do not change the name of this operator.
-mult(x::Float32, y::Float32)::Float32 = mul_float_fast(x, y) #Do not change the name of this operator.
-square(x::Float32)::Float32 = mul_float_fast(x, x)
-cube(x::Float32)::Float32 = mul_float_fast(mul_float_fast(x, x), x)
-pow(x::Float32, y::Float32)::Float32 = sign_fast(x)*pow_fast(abs(x), y)
-div(x::Float32, y::Float32)::Float32 = div_float_fast(x, y)
-logm(x::Float32)::Float32 = log_fast(abs_fast(x) + 1f-8)
-logm2(x::Float32)::Float32 = log2_fast(abs_fast(x) + 1f-8)
-logm10(x::Float32)::Float32 = log10_fast(abs_fast(x) + 1f-8)
-sqrtm(x::Float32)::Float32 = sqrt_fast(abs_fast(x))
-neg(x::Float32)::Float32 = neg_float_fast(x)
-function greater(x::Float32, y::Float32)::Float32
-    if x > y
-        return 1f0
-    end
-    return 0f0
-end
-function relu(x::Float32)::Float32
-    if x > 0f0
-        return x
-    end
-    return 0f0
-end
-function logical_or(x::Float32, y::Float32)::Float32
-    if x > 0f0 || y > 0f0
-        return 1f0
-    end
-    return 0f0
-end
-# (Just use multiplication normally)
-function logical_and(x::Float32, y::Float32)::Float32
-    if x > 0f0 && y > 0f0
-        return 1f0
-    end
-    return 0f0
-end

julia/PopMember.jl DELETED Viewed

@@ -1,10 +0,0 @@
-# Define a member of population by equation, score, and age
-mutable struct PopMember
-    tree::Node
-    score::Float32
-    birth::Integer
-    PopMember(t::Node) = new(t, scoreFunc(t), getTime())
-    PopMember(t::Node, score::Float32) = new(t, score, getTime())
-end

julia/Population.jl DELETED Viewed

@@ -1,40 +0,0 @@
-# A list of members of the population, with easy constructors,
-#  which allow for random generation of new populations
-mutable struct Population
-    members::Array{PopMember, 1}
-    n::Integer
-    Population(pop::Array{PopMember, 1}) = new(pop, size(pop)[1])
-    Population(npop::Integer) = new([PopMember(genRandomTree(3)) for i=1:npop], npop)
-    Population(npop::Integer, nlength::Integer) = new([PopMember(genRandomTree(nlength)) for i=1:npop], npop)
-end
-# Sample 10 random members of the population, and make a new one
-function samplePop(pop::Population)::Population
-    idx = rand(1:pop.n, ns)
-    return Population(pop.members[idx])
-end
-# Sample the population, and get the best member from that sample
-function bestOfSample(pop::Population)::PopMember
-    sample = samplePop(pop)
-    best_idx = argmin([sample.members[member].score for member=1:sample.n])
-    return sample.members[best_idx]
-end
-function finalizeScores(pop::Population)::Population
-    need_recalculate = batching
-    if need_recalculate
-        @inbounds @simd for member=1:pop.n
-            pop.members[member].score = scoreFunc(pop.members[member].tree)
-        end
-    end
-    return pop
-end
-# Return best 10 examples
-function bestSubPop(pop::Population; topn::Integer=10)::Population
-    best_idx = sortperm([pop.members[member].score for member=1:pop.n])
-    return Population(pop.members[best_idx[1:topn]])
-end

julia/ProgramConstants.jl DELETED Viewed

@@ -1,9 +0,0 @@
-const maxdegree = 2
-const actualMaxsize = maxsize + maxdegree
-const len = size(X)[1]
-const nuna = size(unaops)[1]
-const nbin = size(binops)[1]
-const nops = nuna + nbin
-const nvar = size(X)[2];

julia/RegularizedEvolution.jl DELETED Viewed

@@ -1,46 +0,0 @@
-import Random: shuffle!
-# Pass through the population several times, replacing the oldest
-# with the fittest of a small subsample
-function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
-                      frequencyComplexity::Array{Float32, 1})::Population
-    # Batch over each subsample. Can give 15% improvement in speed; probably moreso for large pops.
-    # but is ultimately a different algorithm than regularized evolution, and might not be
-    # as good.
-    if fast_cycle
-        shuffle!(pop.members)
-        n_evol_cycles = round(Integer, pop.n/ns)
-        babies = Array{PopMember}(undef, n_evol_cycles)
-        # Iterate each ns-member sub-sample
-        @inbounds Threads.@threads for i=1:n_evol_cycles
-            best_score = Inf32
-            best_idx = 1+(i-1)*ns
-            # Calculate best member of the subsample:
-            for sub_i=1+(i-1)*ns:i*ns
-                if pop.members[sub_i].score < best_score
-                    best_score = pop.members[sub_i].score
-                    best_idx = sub_i
-                end
-            end
-            allstar = pop.members[best_idx]
-            babies[i] = iterate(allstar, T, curmaxsize, frequencyComplexity)
-        end
-        # Replace the n_evol_cycles-oldest members of each population
-        @inbounds for i=1:n_evol_cycles
-            oldest = argmin([pop.members[member].birth for member=1:pop.n])
-            pop.members[oldest] = babies[i]
-        end
-    else
-        for i=1:round(Integer, pop.n/ns)
-            allstar = bestOfSample(pop)
-            baby = iterate(allstar, T, curmaxsize, frequencyComplexity)
-            #printTree(baby.tree)
-            oldest = argmin([pop.members[member].birth for member=1:pop.n])
-            pop.members[oldest] = baby
-        end
-    end
-    return pop
-end

julia/SimplifyEquation.jl DELETED Viewed

@@ -1,106 +0,0 @@
-# Simplify tree
-function combineOperators(tree::Node)::Node
-    # NOTE: (const (+*-) const) already accounted for. Call simplifyTree before.
-    # ((const + var) + const) => (const + var)
-    # ((const * var) * const) => (const * var)
-    # ((const - var) - const) => (const - var)
-    # (want to add anything commutative!)
-    # TODO - need to combine plus/sub if they are both there.
-    if tree.degree == 0
-        return tree
-    elseif tree.degree == 1
-        tree.l = combineOperators(tree.l)
-    elseif tree.degree == 2
-        tree.l = combineOperators(tree.l)
-        tree.r = combineOperators(tree.r)
-    end
-    top_level_constant = tree.degree == 2 && (tree.l.constant || tree.r.constant)
-    if tree.degree == 2 && (binops[tree.op] === mult || binops[tree.op] === plus) && top_level_constant
-        op = tree.op
-        # Put the constant in r. Need to assume var in left for simplification assumption.
-        if tree.l.constant
-            tmp = tree.r
-            tree.r = tree.l
-            tree.l = tmp
-        end
-        topconstant = tree.r.val
-        # Simplify down first
-        below = tree.l
-        if below.degree == 2 && below.op == op
-            if below.l.constant
-                tree = below
-                tree.l.val = binops[op](tree.l.val, topconstant)
-            elseif below.r.constant
-                tree = below
-                tree.r.val = binops[op](tree.r.val, topconstant)
-            end
-        end
-    end
-    if tree.degree == 2 && binops[tree.op] === sub && top_level_constant
-        # Currently just simplifies subtraction. (can't assume both plus and sub are operators)
-        # Not commutative, so use different op.
-        if tree.l.constant
-            if tree.r.degree == 2 && binops[tree.r.op] === sub
-                if tree.r.l.constant
-                    #(const - (const - var)) => (var - const)
-                    l = tree.l
-                    r = tree.r
-                    simplified_const = -(l.val - r.l.val) #neg(sub(l.val, r.l.val))
-                    tree.l = tree.r.r
-                    tree.r = l
-                    tree.r.val = simplified_const
-                elseif tree.r.r.constant
-                    #(const - (var - const)) => (const - var)
-                    l = tree.l
-                    r = tree.r
-                    simplified_const = l.val + r.r.val #plus(l.val, r.r.val)
-                    tree.r = tree.r.l
-                    tree.l.val = simplified_const
-                end
-            end
-        else #tree.r.constant is true
-            if tree.l.degree == 2 && binops[tree.l.op] === sub
-                if tree.l.l.constant
-                    #((const - var) - const) => (const - var)
-                    l = tree.l
-                    r = tree.r
-                    simplified_const = l.l.val - r.val#sub(l.l.val, r.val)
-                    tree.r = tree.l.r
-                    tree.l = r
-                    tree.l.val = simplified_const
-                elseif tree.l.r.constant
-                    #((var - const) - const) => (var - const)
-                    l = tree.l
-                    r = tree.r
-                    simplified_const = r.val + l.r.val #plus(r.val, l.r.val)
-                    tree.l = tree.l.l
-                    tree.r.val = simplified_const
-                end
-            end
-        end
-    end
-    return tree
-end
-# Simplify tree
-function simplifyTree(tree::Node)::Node
-    if tree.degree == 1
-        tree.l = simplifyTree(tree.l)
-        if tree.l.degree == 0 && tree.l.constant
-            return Node(unaops[tree.op](tree.l.val))
-        end
-    elseif tree.degree == 2
-        tree.l = simplifyTree(tree.l)
-        tree.r = simplifyTree(tree.r)
-        constantsBelow = (
-             tree.l.degree == 0 && tree.l.constant &&
-             tree.r.degree == 0 && tree.r.constant
-        )
-        if constantsBelow
-            return Node(binops[tree.op](tree.l.val, tree.r.val))
-        end
-    end
-    return tree
-end

julia/SingleIteration.jl DELETED Viewed

@@ -1,28 +0,0 @@
-# Cycle through regularized evolution many times,
-# printing the fittest equation every 10% through
-function run(
-        pop::Population,
-        ncycles::Integer,
-        curmaxsize::Integer,
-        frequencyComplexity::Array{Float32, 1};
-        verbosity::Integer=0
-       )::Population
-    allT = LinRange(1.0f0, 0.0f0, ncycles)
-    for iT in 1:size(allT)[1]
-        if annealing
-            pop = regEvolCycle(pop, allT[iT], curmaxsize, frequencyComplexity)
-        else
-            pop = regEvolCycle(pop, 1.0f0, curmaxsize, frequencyComplexity)
-        end
-        if verbosity > 0 && (iT % verbosity == 0)
-            bestPops = bestSubPop(pop)
-            bestCurScoreIdx = argmin([bestPops.members[member].score for member=1:bestPops.n])
-            bestCurScore = bestPops.members[bestCurScoreIdx].score
-            debug(verbosity, bestCurScore, " is the score for ", stringTree(bestPops.members[bestCurScoreIdx].tree))
-        end
-    end
-    return pop
-end

julia/Utils.jl DELETED Viewed

@@ -1,34 +0,0 @@
-import Printf: @printf
-function id(x::Float32)::Float32
-    x
-end
-function debug(verbosity, string...)
-    verbosity > 0 ? println(string...) : nothing
-end
-function getTime()::Integer
-    return round(Integer, 1e3*(time()-1.6e9))
-end
-# Check for errors before they happen
-function testConfiguration()
-    test_input = LinRange(-100f0, 100f0, 99)
-    try
-        for left in test_input
-            for right in test_input
-                for binop in binops
-                    test_output = binop.(left, right)
-                end
-            end
-            for unaop in unaops
-                test_output = unaop.(left)
-            end
-        end
-    catch error
-        @printf("\n\nYour configuration is invalid - one of your operators is not well-defined over the real line.\n\n\n")
-        throw(error)
-    end
-end

julia/halloffame.jl DELETED Viewed

@@ -1,8 +0,0 @@
-# List of the best members seen all time
-mutable struct HallOfFame
-    members::Array{PopMember, 1}
-    exists::Array{Bool, 1} #Whether it has been set
-    # Arranged by complexity - store one at each.
-    HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
-end

julia/sr.jl DELETED Viewed

@@ -1,218 +0,0 @@
-import Printf: @printf
-function fullRun(niterations::Integer;
-                npop::Integer=300,
-                ncyclesperiteration::Integer=3000,
-                fractionReplaced::Float32=0.1f0,
-                verbosity::Integer=0,
-                topn::Integer=10
-               )
-    testConfiguration()
-    # 1. Start a population on every process
-    allPops = Future[]
-    # Set up a channel to send finished populations back to head node
-    channels = [RemoteChannel(1) for j=1:npopulations]
-    bestSubPops = [Population(1) for j=1:npopulations]
-    hallOfFame = HallOfFame()
-    frequencyComplexity = ones(Float32, actualMaxsize)
-    curmaxsize = 3
-    if warmupMaxsize == 0
-        curmaxsize = maxsize
-    end
-    for i=1:npopulations
-        future = @spawnat :any Population(npop, 3)
-        push!(allPops, future)
-    end
-    # # 2. Start the cycle on every process:
-    @sync for i=1:npopulations
-        @async allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
-    end
-    println("Started!")
-    cycles_complete = npopulations * niterations
-    if warmupMaxsize != 0
-        curmaxsize += 1
-        if curmaxsize > maxsize
-            curmaxsize = maxsize
-        end
-    end
-    last_print_time = time()
-    num_equations = 0.0
-    print_every_n_seconds = 5
-    equation_speed = Float32[]
-    for i=1:npopulations
-        # Start listening for each population to finish:
-        @async put!(channels[i], fetch(allPops[i]))
-    end
-    while cycles_complete > 0
-        @inbounds for i=1:npopulations
-            # Non-blocking check if a population is ready:
-            if isready(channels[i])
-                # Take the fetch operation from the channel since its ready
-                cur_pop = take!(channels[i])
-                bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
-                #Try normal copy...
-                bestPops = Population([member for pop in bestSubPops for member in pop.members])
-                for member in cur_pop.members
-                    size = countNodes(member.tree)
-                    frequencyComplexity[size] += 1
-                    if member.score < hallOfFame.members[size].score
-                        hallOfFame.members[size] = deepcopy(member)
-                        hallOfFame.exists[size] = true
-                    end
-                end
-                # Dominating pareto curve - must be better than all simpler equations
-                dominating = PopMember[]
-                open(hofFile, "w") do io
-                    println(io,"Complexity|MSE|Equation")
-                    for size=1:actualMaxsize
-                        if hallOfFame.exists[size]
-                            member = hallOfFame.members[size]
-                            if weighted
-                                curMSE = MSE(evalTreeArray(member.tree), y, weights)
-                            else
-                                curMSE = MSE(evalTreeArray(member.tree), y)
-                            end
-                            numberSmallerAndBetter = 0
-                            for i=1:(size-1)
-                                if weighted
-                                    hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
-                                else
-                                    hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
-                                end
-                                if (hallOfFame.exists[size] && curMSE > hofMSE)
-                                    numberSmallerAndBetter += 1
-                                end
-                            end
-                            betterThanAllSmaller = (numberSmallerAndBetter == 0)
-                            if betterThanAllSmaller
-                                println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
-                                push!(dominating, member)
-                            end
-                        end
-                    end
-                end
-                cp(hofFile, hofFile*".bkup", force=true)
-                # Try normal copy otherwise.
-                if migration
-                    for k in rand(1:npop, round(Integer, npop*fractionReplaced))
-                        to_copy = rand(1:size(bestPops.members)[1])
-                        cur_pop.members[k] = PopMember(
-                            copyNode(bestPops.members[to_copy].tree),
-                            bestPops.members[to_copy].score)
-                    end
-                end
-                if hofMigration && size(dominating)[1] > 0
-                    for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
-                        # Copy in case one gets used twice
-                        to_copy = rand(1:size(dominating)[1])
-                        cur_pop.members[k] = PopMember(
-                           copyNode(dominating[to_copy].tree)
-                        )
-                    end
-                end
-                @async begin
-                    allPops[i] = @spawnat :any let
-                        tmp_pop = run(cur_pop, ncyclesperiteration, curmaxsize, copy(frequencyComplexity)/sum(frequencyComplexity), verbosity=verbosity)
-                        @inbounds @simd for j=1:tmp_pop.n
-                            if rand() < 0.1
-                                tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
-                                tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
-                                if shouldOptimizeConstants
-                                    tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
-                                end
-                            end
-                        end
-                        tmp_pop = finalizeScores(tmp_pop)
-                        tmp_pop
-                    end
-                    put!(channels[i], fetch(allPops[i]))
-                end
-                cycles_complete -= 1
-                cycles_elapsed = npopulations * niterations - cycles_complete
-                if warmupMaxsize != 0 && cycles_elapsed % warmupMaxsize == 0
-                    curmaxsize += 1
-                    if curmaxsize > maxsize
-                        curmaxsize = maxsize
-                    end
-                end
-                num_equations += ncyclesperiteration * npop / 10.0
-            end
-        end
-        sleep(1e-3)
-        elapsed = time() - last_print_time
-        #Update if time has passed, and some new equations generated.
-        if elapsed > print_every_n_seconds && num_equations > 0.0
-            # Dominating pareto curve - must be better than all simpler equations
-            current_speed = num_equations/elapsed
-            average_over_m_measurements = 10 #for print_every...=5, this gives 50 second running average
-            push!(equation_speed, current_speed)
-            if length(equation_speed) > average_over_m_measurements
-                deleteat!(equation_speed, 1)
-            end
-            average_speed = sum(equation_speed)/length(equation_speed)
-            curMSE = baselineMSE
-            lastMSE = curMSE
-            lastComplexity = 0
-            if verbosity > 0
-                @printf("\n")
-                @printf("Cycles per second: %.3e\n", round(average_speed, sigdigits=3))
-                cycles_elapsed = npopulations * niterations - cycles_complete
-                @printf("Progress: %d / %d total iterations (%.3f%%)\n", cycles_elapsed, npopulations * niterations, 100.0*cycles_elapsed/(npopulations*niterations))
-                @printf("Hall of Fame:\n")
-                @printf("-----------------------------------------\n")
-                @printf("%-10s  %-8s   %-8s  %-8s\n", "Complexity", "MSE", "Score", "Equation")
-                @printf("%-10d  %-8.3e  %-8.3e  %-.f\n", 0, curMSE, 0f0, avgy)
-            end
-            for size=1:actualMaxsize
-                if hallOfFame.exists[size]
-                    member = hallOfFame.members[size]
-                    if weighted
-                        curMSE = MSE(evalTreeArray(member.tree), y, weights)
-                    else
-                        curMSE = MSE(evalTreeArray(member.tree), y)
-                    end
-                    numberSmallerAndBetter = 0
-                    for i=1:(size-1)
-                        if weighted
-                            hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y, weights)
-                        else
-                            hofMSE = MSE(evalTreeArray(hallOfFame.members[i].tree), y)
-                        end
-                        if (hallOfFame.exists[size] && curMSE > hofMSE)
-                            numberSmallerAndBetter += 1
-                        end
-                    end
-                    betterThanAllSmaller = (numberSmallerAndBetter == 0)
-                    if betterThanAllSmaller
-                        delta_c = size - lastComplexity
-                        delta_l_mse = log(curMSE/lastMSE)
-                        score = convert(Float32, -delta_l_mse/delta_c)
-                        if verbosity > 0
-                            @printf("%-10d  %-8.3e  %-8.3e  %-s\n" , size, curMSE, score, stringTree(member.tree))
-                        end
-                        lastMSE = curMSE
-                        lastComplexity = size
-                    end
-                end
-            end
-            debug(verbosity, "")
-            last_print_time = time()
-            num_equations = 0.0
-        end
-    end
-end

julia/truth.jl DELETED Viewed

@@ -1,77 +0,0 @@
-# *** Custom Functions
-##################################################################################################################################
-# *** Will somewhere need to define a list TRUTHS of all valid auxliary truths
-struct Transformation
-    type::Integer # 1 is symmetry, 2 is zero, 3 is equality
-    params::Array{Int32}
-    Transformation(type::Integer, params::Array{Int32}) = new(type, params)
-    Transformation(type::Integer, params::Array{Int64}) = new(type, params)
-end
-struct Truth
-    transformation::Transformation
-    weights::Array{Float32}
-    Truth(transformation::Transformation, weights::Array{Float32}) = new(transformation, weights)
-    Truth(type::Int64, params::Array{Int64}, weights::Array{Float32}) = new(Transformation(type, params), weights)
-    Truth(transformation::Transformation, weights::Array{Float64}) = new(transformation, weights)
-    Truth(type::Int64, params::Array{Int64}, weights::Array{Float64}) = new(Transformation(type, params), weights)
-end
-# Returns a linear combination when given X of shape nxd, y of shape nx1 is f(x) and w of shape d+2x1, result is shape nx1
-function LinearPrediction(cX::Array{Float32}, cy::Array{Float32}, w::Array{Float32})::Array{Float32}
-     preds = 0
-     for i in 1:ndims(cX)
-       preds = preds .+ cX[:,i].*w[i]
-       end
-     preds = preds .+ cy.*w[ndims(cX)+1]
-     return preds .+ w[ndims(cX)+2]
-end
-# Returns a copy of the data with the two specified columns swapped
-function swapColumns(cX::Array{Float32, 2}, a::Integer, b::Integer)::Array{Float32, 2}
-    X1 = copy(cX)
-    X1[:, a] = cX[:, b]
-    X1[:, b] = cX[:, a]
-    return X1
-end
-# Returns a copy of the data with the specified integers in the list set to value given
-function setVal(cX::Array{Float32, 2}, a::Array{Int32, 1}, val::Float32)::Array{Float32, 2}
-    X1 = copy(cX)
-    for i in 1:size(a)[1]
-        X1[:, a[i]] = fill!(cX[:, a[i]], val)
-    end
-    return X1
-end
-# Returns a copy of the data with the specified integer indices in the list set to the first item of that list
-function setEq(cX::Array{Float32, 2}, a::Array{Int32, 1})::Array{Float32, 2}
-    X1 = copy(cX)
-    val = X1[:, a[1]]
-    for i in 1:size(a)[1]
-        X1[:, a[i]] = val
-    end
-    return X1
-end
-# Takes in a dataset and returns the transformed version of it as per the specified type and parameters
-function transform(cX::Array{Float32, 2}, transformation::Transformation)::Array{Float32, 2}
-    if transformation.type==1 # then symmetry
-        a = transformation.params[1]
-        b = transformation.params[2]
-        return swapColumns(cX, a, b)
-    elseif transformation.type==2 # then zero condition
-        return setVal(cX, transformation.params, Float32(0))
-    elseif transformation.type == 3 # then equality condition
-        return setEq(cX, transformation.params)
-    else # Then error return X
-        return cX
-    end
-end
-function transform(cX::Array{Float32, 2}, truth::Truth)::Array{Float32, 2}
-    return transform(cX, truth.transformation)
-end
-# Takes in X that has been transformed and returns what the Truth projects the target values should be
-function truthPrediction(X_transformed::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Array{Float32}
-    return LinearPrediction(X_transformed, cy, truth.weights)
-end

julia/truthPops.jl DELETED Viewed

@@ -1,170 +0,0 @@
-# Returns the MSE between the predictions and the truth provided targets for the given dataset
-function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
-    transformed = transform(cX, truth)
-    targets = truthPrediction(transformed, cy, truth)
-    preds = evalTreeArray(member.tree, transformed)
-    return MSE(preds, targets)
-end
-# Assumes a dataset X, y for a given truth
-function truthScore(member::PopMember, truth::Truth)::Float32
-    return truthScore(member, X, y, truth)
-end
-# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
-function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
-    s = 0
-    for truth in TRUTHS
-        s += (truthScore(member, cX, cy, truth))/size(TRUTHS)[1]
-    end
-    return s
-end
-# Assumes list of Truths TRUTHS and dataset X, y are defined
-function truthScore(member::PopMember)::Float32
-    return truthScore(member, X, y)
-end
-# Returns the MSE between the predictions and the truth provided targets for the given dataset
-function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
-    transformed = transform(cX, truth)
-    targets = truthPrediction(transformed, cy, truth)
-    preds = evalTreeArray(tree, transformed)
-    return MSE(preds, targets)
-end
-# Assumes a dataset X, y for a given truth
-function truthScore(tree::Node, truth::Truth)::Float32
-    return truthScore(tree, X, y, truth)
-end
-# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
-function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
-    s = 0
-    for truth in TRUTHS
-        s += (truthScore(tree, cX, cy, truth))/size(TRUTHS)[1]
-    end
-    return s
-end
-# Assumes list of Truths TRUTHS and dataset X, y are defined
-function truthScore(tree::Node)::Float32
-    return truthScore(tree, X, y)
-end
-# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
-function testTruth(member::PopMember, truth::Truth, threshold::Float32=Float32(1.0e-8))::Bool
-    truthError = truthScore(member, truth)
-    #print(stringTree(member.tree), "\n")
-    #print(truth, ": ")
-    #print(truthError, "\n")
-    if truthError > threshold
-        #print("Returns False \n ----\n")
-        return false
-    else
-        #print("Returns True \n ----\n")
-        return true
-    end
-end
-# Returns a list of violating functions from assumed list TRUTHS
-function violatingTruths(member::PopMember)::Array{Truth}
-    return violatingTruths(member.tree)
-end
-# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
-function testTruth(tree::Node, truth::Truth, threshold::Float32=Float32(1.0e-3))::Bool
-    truthError = truthScore(tree, truth)
-    if truthError > threshold
-        return false
-    else
-        return true
-    end
-end
-# Returns a list of violating functions from assumed list TRUTHS
-function violatingTruths(tree::Node)::Array{Truth}
-    toReturn = []
-    #print("\n Checking Equation ", stringTree(tree), "\n")
-    for truth in TRUTHS
-        test_truth = testTruth(tree, truth)
-        #print("Truth: ", truth, ": " , test_truth, "\n-----\n")
-        if !test_truth
-            append!(toReturn, [truth])
-        end
-    end
-    return toReturn
-end
-function randomIndex(cX::Array{Float32, 2}, k::Integer=10)::Array{Int32, 1}
-    indxs = sample([Int32(i) for i in 1:size(cX)[1]], k)
-    return indxs
-end
-function randomIndex(leng::Integer, k::Integer=10)::Array{Int32, 1}
-    indxs = sample([Int32(i) for i in 1:leng], k)
-    return indxs
-end
-function extendedX(cX::Array{Float32, 2}, truth::Truth, indx::Array{Int32, 1})::Array{Float32, 2}
-    workingcX = copy(cX)
-    X_slice = workingcX[indx, :]
-    X_transformed = transform(X_slice, truth)
-    return X_transformed
-end
-function extendedX(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
-    return extendedX(OGX, truth, indx)
-end
-function extendedX(cX::Array{Float32, 2}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
-    if length(violatedTruths) == 0
-        return nothing
-    end
-    workingX = extendedX(cX, violatedTruths[1], indx)
-    for truth in violatedTruths[2:length(violatedTruths)]
-        workingX = vcat(workingX, extendedX(cX, truth, indx))
-    end
-    return workingX
-end
-function extendedX(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
-    return extendedX(OGX, violatedTruths, indx)
-end
-function extendedX(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
-    violatedTruths = violatingTruths(tree)
-    return extendedX(violatedTruths, indx)
-end
-function extendedX(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
-    return extendedX(member.tree, indx)
-end
-function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    cy = copy(cy)
-    cX = copy(cX)
-    X_slice = cX[indx, :]
-    y_slice = cy[indx]
-    X_transformed = transform(X_slice, truth)
-    y_transformed = truthPrediction(X_transformed, y_slice, truth)
-    return y_transformed
-end
-function extendedy(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    return extendedy(OGX, OGy, truth, indx)
-end
-function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    if length(violatedTruths) == 0
-        return nothing
-    end
-    workingy = extendedy(cX, cy, violatedTruths[1], indx)
-    for truth in violatedTruths[2:length(violatedTruths)]
-        workingy = vcat(workingy, extendedy(cX, cy, truth, indx))
-    end
-    return workingy
-end
-function extendedy(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    return extendedy(OGX,OGy, violatedTruths, indx)
-end
-function extendedy(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    violatedTruths = violatingTruths(tree)
-    return extendedy(violatedTruths, indx)
-end
-function extendedy(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
-    return extendedy(member.tree, indx)
-end

pysr/sr.py CHANGED Viewed

@@ -100,13 +100,13 @@ def pysr(X=None, y=None, weights=None,
             useFrequency=False,
             tempdir=None,
             delete_tempfiles=True,
-            limitPowComplexity=False, #deprecated
-            threads=None, #deprecated
             julia_optimization=3,
         ):
     """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
     Note: most default parameters have been tuned over several example
-    equations, but you should adjust `threads`, `niterations`,
     `binary_operators`, `unary_operators` to your requirements.
     :param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
@@ -188,12 +188,16 @@ def pysr(X=None, y=None, weights=None,
     :param julia_optimization: int, Optimization level (0, 1, 2, 3)
     :param tempdir: str or None, directory for the temporary files
     :param delete_tempfiles: bool, whether to delete the temporary files after finishing
     :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
         (as strings).
     """
-    _raise_depreciation_errors(limitPowComplexity, threads)
     if isinstance(X, pd.DataFrame):
         variable_names = list(X.columns)
         X = np.array(X)
@@ -239,8 +243,7 @@ def pysr(X=None, y=None, weights=None,
                  topn=topn, verbosity=verbosity,
                  julia_optimization=julia_optimization, timeout=timeout,
                  fractionReplacedHof=fractionReplacedHof,
-                 hofMigration=hofMigration,
-                 limitPowComplexity=limitPowComplexity, maxdepth=maxdepth,
                  maxsize=maxsize, migration=migration, nrestarts=nrestarts,
                  parsimony=parsimony, perturbationFactor=perturbationFactor,
                  populations=populations, procs=procs,
@@ -257,17 +260,24 @@ def pysr(X=None, y=None, weights=None,
                  weightRandomize=weightRandomize,
                  weightSimplify=weightSimplify,
                  constraints=constraints,
-                 extra_sympy_mappings=extra_sympy_mappings)
     kwargs = {**_set_paths(tempdir), **kwargs}
-    kwargs['def_hyperparams'] = _metaprogram_fast_operator(**kwargs)
     _handle_constraints(**kwargs)
     kwargs['constraints_str'] = _make_constraints_str(**kwargs)
     kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
-    kwargs['def_auxiliary'] = _make_auxiliary_julia_str(**kwargs)
     kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
     _create_julia_files(**kwargs)
@@ -280,13 +290,6 @@ def pysr(X=None, y=None, weights=None,
     return get_hof(**kwargs)
-def _make_auxiliary_julia_str(julia_auxiliary_filenames, **kwargs):
-    def_auxiliary = '\n'.join([
-        f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
-    ])
-    return def_auxiliary
 def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
     global global_n_features
     global global_equation_file
@@ -298,14 +301,16 @@ def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwarg
     global_extra_sympy_mappings = extra_sympy_mappings
-def _final_pysr_process(julia_optimization, procs, runfile_filename, timeout, **kwargs):
     command = [
         f'julia', f'-O{julia_optimization:d}',
-        f'-p', f'{procs}',
         str(runfile_filename),
     ]
     if timeout is not None:
         command = [f'timeout', f'{timeout}'] + command
     print("Running on", ' '.join(command))
     process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
     try:
@@ -320,73 +325,108 @@ def _final_pysr_process(julia_optimization, procs, runfile_filename, timeout, **
         print("Killing process... will return when done.")
         process.kill()
-def _create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
-                       ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity, **kwargs):
     with open(hyperparam_filename, 'w') as f:
         print(def_hyperparams, file=f)
     with open(dataset_filename, 'w') as f:
         print(def_datasets, file=f)
-    with open(auxiliary_filename, 'w') as f:
-        print(def_auxiliary, file=f)
     with open(runfile_filename, 'w') as f:
-        print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
-        print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
-        print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
-        print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
-        print(
-            f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
-            file=f)
-        print(f'rmprocs(nprocs)', file=f)
 def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
     def_datasets = """using DelimitedFiles"""
     np.savetxt(X_filename, X, delimiter=',')
-    np.savetxt(y_filename, y, delimiter=',')
     if weights is not None:
-        np.savetxt(weights_filename, weights, delimiter=',')
     def_datasets += f"""
-const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
-const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
     if weights is not None:
         def_datasets += f"""
-const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
     return def_datasets
 def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
                                def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
-                               limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
                                parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
-                               unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
                                weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
                                weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
-    def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
 {constraints_str}
-const binops = {'[' + ', '.join(binary_operators) + ']'}
-const unaops = {'[' + ', '.join(unary_operators) + ']'}
-const ns=10;
-const parsimony = {parsimony:f}f0
-const alpha = {alpha:f}f0
-const maxsize = {maxsize:d}
-const maxdepth = {maxdepth:d}
-const fast_cycle = {'true' if fast_cycle else 'false'}
-const migration = {'true' if migration else 'false'}
-const hofMigration = {'true' if hofMigration else 'false'}
-const fractionReplacedHof = {fractionReplacedHof}f0
-const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
-const hofFile = "{equation_file}"
-const nprocs = {procs:d}
-const npopulations = {populations:d}
-const nrestarts = {nrestarts:d}
-const perturbationFactor = {perturbationFactor:f}f0
-const annealing = {"true" if annealing else "false"}
-const weighted = {"true" if weights is not None else "false"}
-const batching = {"true" if batching else "false"}
-const batchSize = {min([batchSize, len(X)]) if batching else len(X):d}
-const useVarMap = {"true" if use_custom_variable_names else "false"}
-const mutationWeights = [
     {weightMutateConstant:f},
     {weightMutateOperator:f},
     {weightAddNode:f},
@@ -395,53 +435,25 @@ const mutationWeights = [
     {weightSimplify:f},
     {weightRandomize:f},
     {weightDoNothing:f}
-]
-const warmupMaxsize = {warmupMaxsize:d}
-const limitPowComplexity = {"true" if limitPowComplexity else "false"}
-const useFrequency = {"true" if useFrequency else "false"}
 """
-    op_runner = ""
-    if len(binary_operators) > 0:
-        op_runner += """
-@inline function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int)
-    if i === 1
-        @inbounds @simd for j=1:clen
-            x[j] = """f"{binary_operators[0]}""""(x[j], y[j])
-        end"""
-        for i in range(1, len(binary_operators)):
-            op_runner += f"""
-    elseif i === {i + 1}
-        @inbounds @simd for j=1:clen
-            x[j] = {binary_operators[i]}(x[j], y[j])
-        end"""
-        op_runner += """
-    end
-end"""
-    if len(unary_operators) > 0:
-        op_runner += """
-@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
-    if i === 1
-        @inbounds @simd for j=1:clen
-            x[j] = """f"{unary_operators[0]}(x[j])""""
-        end"""
-        for i in range(1, len(unary_operators)):
-            op_runner += f"""
-    elseif i === {i + 1}
-        @inbounds @simd for j=1:clen
-            x[j] = {unary_operators[i]}(x[j])
-        end"""
-        op_runner += """
-    end
-end"""
-    def_hyperparams += op_runner
     if use_custom_variable_names:
-        def_hyperparams += f"""
-    const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
     return def_hyperparams
 def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
-    constraints_str = "const una_constraints = ["
     first = True
     for op in unary_operators:
         val = constraints[op]
@@ -449,8 +461,8 @@ def _make_constraints_str(binary_operators, constraints, unary_operators, **kwar
             constraints_str += ", "
         constraints_str += f"{val:d}"
         first = False
-    constraints_str += """]
-const bin_constraints = ["""
     first = True
     for op in binary_operators:
         tup = constraints[op]
@@ -458,7 +470,7 @@ const bin_constraints = ["""
             constraints_str += ", "
         constraints_str += f"({tup[0]:d}, {tup[1]:d})"
         first = False
-    constraints_str += "]"
     return constraints_str
@@ -481,7 +493,7 @@ def _handle_constraints(binary_operators, constraints, unary_operators, **kwargs
                 constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
-def _metaprogram_fast_operator(binary_operators, unary_operators, **kwargs):
     def_hyperparams = ""
     for op_list in [binary_operators, unary_operators]:
         for i in range(len(op_list)):
@@ -529,35 +541,20 @@ def _handle_feature_selection(X, select_k_features, use_custom_variable_names, v
 def _set_paths(tempdir):
     # System-independent paths
-    pkg_directory = Path(__file__).parents[1] / 'julia'
-    pkg_filename = pkg_directory / "sr.jl"
-    operator_filename = pkg_directory / "Operators.jl"
-    julia_auxiliaries = [
-        "Equation.jl", "ProgramConstants.jl",
-        "LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
-        "MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
-        "HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
-        "Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
-        "ConstantOptimization.jl"
-    ]
-    julia_auxiliary_filenames = [
-        pkg_directory / fname
-        for fname in julia_auxiliaries
-    ]
     tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
     hyperparam_filename = tmpdir / f'hyperparams.jl'
     dataset_filename = tmpdir / f'dataset.jl'
-    auxiliary_filename = tmpdir / f'auxiliary.jl'
     runfile_filename = tmpdir / f'runfile.jl'
     X_filename = tmpdir / "X.csv"
     y_filename = tmpdir / "y.csv"
     weights_filename = tmpdir / "weights.csv"
-    return dict(auxiliary_filename=auxiliary_filename, X_filename=X_filename,
             dataset_filename=dataset_filename,
             hyperparam_filename=hyperparam_filename,
-            julia_auxiliary_filenames=julia_auxiliary_filenames,
-            operator_filename=operator_filename, pkg_filename=pkg_filename,
             runfile_filename=runfile_filename, tmpdir=tmpdir,
             weights_filename=weights_filename, y_filename=y_filename)
@@ -575,13 +572,6 @@ def _check_assertions(X, binary_operators, unary_operators, use_custom_variable_
         assert len(variable_names) == X.shape[1]
-def _raise_depreciation_errors(limitPowComplexity, threads):
-    if threads is not None:
-        raise ValueError("The threads kwarg is deprecated. Use procs.")
-    if limitPowComplexity:
-        raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
 def run_feature_selection(X, y, select_k_features):
     """Use a gradient boosting tree regressor as a proxy for finding
         the k most important features in X, returning indices for those
@@ -695,3 +685,15 @@ def _escape_filename(filename):
     repr = str(filename)
     repr = repr.replace('\\', '\\\\')
     return repr

             useFrequency=False,
             tempdir=None,
             delete_tempfiles=True,
             julia_optimization=3,
+            julia_project=None,
+            user_input=True
         ):
     """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
     Note: most default parameters have been tuned over several example
+    equations, but you should adjust `niterations`,
     `binary_operators`, `unary_operators` to your requirements.
     :param X: np.ndarray or pandas.DataFrame, 2D array. Rows are examples,
     :param julia_optimization: int, Optimization level (0, 1, 2, 3)
     :param tempdir: str or None, directory for the temporary files
     :param delete_tempfiles: bool, whether to delete the temporary files after finishing
+    :param julia_project: str or None, a Julia environment location containing
+        a Project.toml (and potentially the source code for SymbolicRegression.jl).
+        Default gives the Python package directory, where a Project.toml file
+        should be present from the install.
+    :param user_input: Whether to ask for user input or not for installing (to
+        be used for automated scripts). Will choose to install when asked.
     :returns: pd.DataFrame, Results dataframe, giving complexity, MSE, and equations
         (as strings).
     """
     if isinstance(X, pd.DataFrame):
         variable_names = list(X.columns)
         X = np.array(X)
                  topn=topn, verbosity=verbosity,
                  julia_optimization=julia_optimization, timeout=timeout,
                  fractionReplacedHof=fractionReplacedHof,
+                 hofMigration=hofMigration, maxdepth=maxdepth,
                  maxsize=maxsize, migration=migration, nrestarts=nrestarts,
                  parsimony=parsimony, perturbationFactor=perturbationFactor,
                  populations=populations, procs=procs,
                  weightRandomize=weightRandomize,
                  weightSimplify=weightSimplify,
                  constraints=constraints,
+                 extra_sympy_mappings=extra_sympy_mappings,
+                 julia_project=julia_project)
     kwargs = {**_set_paths(tempdir), **kwargs}
+    pkg_directory = kwargs['pkg_directory']
+    kwargs['need_install'] = False
+    if not (pkg_directory / 'Manifest.toml').is_file():
+        kwargs['need_install'] = (not user_input) or _yesno("I will install Julia packages using PySR's Project.toml file. OK?")
+        if kwargs['need_install']:
+            print("OK. I will install at launch.")
+    kwargs['def_hyperparams'] = _create_inline_operators(**kwargs)
     _handle_constraints(**kwargs)
     kwargs['constraints_str'] = _make_constraints_str(**kwargs)
     kwargs['def_hyperparams'] = _make_hyperparams_julia_str(**kwargs)
     kwargs['def_datasets'] = _make_datasets_julia_str(**kwargs)
     _create_julia_files(**kwargs)
     return get_hof(**kwargs)
 def _set_globals(X, equation_file, extra_sympy_mappings, variable_names, **kwargs):
     global global_n_features
     global global_equation_file
     global_extra_sympy_mappings = extra_sympy_mappings
+def _final_pysr_process(julia_optimization, runfile_filename, timeout, **kwargs):
     command = [
         f'julia', f'-O{julia_optimization:d}',
         str(runfile_filename),
     ]
     if timeout is not None:
         command = [f'timeout', f'{timeout}'] + command
+    _cmd_runner(command)
+def _cmd_runner(command):
     print("Running on", ' '.join(command))
     process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1)
     try:
         print("Killing process... will return when done.")
         process.kill()
+def _create_julia_files(dataset_filename, def_datasets,  hyperparam_filename, def_hyperparams,
+                        fractionReplaced, ncyclesperiteration, niterations, npop,
+                        runfile_filename, topn, verbosity, julia_project, procs, weights,
+                        X, variable_names, pkg_directory, need_install, **kwargs):
     with open(hyperparam_filename, 'w') as f:
         print(def_hyperparams, file=f)
     with open(dataset_filename, 'w') as f:
         print(def_datasets, file=f)
     with open(runfile_filename, 'w') as f:
+        if julia_project is None:
+            julia_project = pkg_directory
+        else:
+            julia_project = Path(julia_project)
+        print(f'import Pkg', file=f)
+        print(f'Pkg.activate("{_escape_filename(julia_project)}")', file=f)
+        if need_install:
+            print(f'Pkg.add("SymbolicRegression")', file=f)
+            print(f'Pkg.instantiate()', file=f)
+            print(f'Pkg.precompile()', file=f)
+        print(f'using SymbolicRegression', file=f)
+        print(f'include("{_escape_filename(hyperparam_filename)}")', file=f)
+        print(f'include("{_escape_filename(dataset_filename)}")', file=f)
+        if len(variable_names) == 0:
+            varMap = "[" + ",".join([f'"x{i}"' for i in range(X.shape[1])]) + "]"
+        else:
+            varMap = "[" + ",".join(variable_names) + "]"
+        if weights is not None:
+            print(f'EquationSearch(X, y, weights=weights, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
+        else:
+            print(f'EquationSearch(X, y, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={procs})', file=f)
 def _make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename, **kwargs):
     def_datasets = """using DelimitedFiles"""
     np.savetxt(X_filename, X, delimiter=',')
+    np.savetxt(y_filename, y.reshape(-1, 1), delimiter=',')
     if weights is not None:
+        np.savetxt(weights_filename, weights.reshape(-1, 1), delimiter=',')
     def_datasets += f"""
+X = copy(transpose(readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')))
+y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')[:, 1]"""
     if weights is not None:
         def_datasets += f"""
+weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')[:, 1]"""
     return def_datasets
 def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
                                def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
+                               maxdepth, maxsize, migration, nrestarts, npop,
                                parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
+                               unary_operators, useFrequency, use_custom_variable_names,
+                               variable_names, warmupMaxsize, weightAddNode,
+                               ncyclesperiteration, fractionReplaced, topn, verbosity,
                                weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
                                weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
+    def tuple_fix(ops):
+        if len(ops) > 1:
+            return ', '.join(ops)
+        elif len(ops) == 0:
+            return ''
+        else:
+            return ops[0] + ','
+    def_hyperparams += f"""\n
+plus=(+)
+sub=(-)
+mult=(*)
+square=SymbolicRegression.square
+cube=SymbolicRegression.cube
+pow=(^)
+div=(/)
+logm=SymbolicRegression.logm
+logm2=SymbolicRegression.logm2
+logm10=SymbolicRegression.logm10
+sqrtm=SymbolicRegression.sqrtm
+neg=SymbolicRegression.neg
+greater=SymbolicRegression.greater
+relu=SymbolicRegression.relu
+logical_or=SymbolicRegression.logical_or
+logical_and=SymbolicRegression.logical_and
+options = SymbolicRegression.Options(binary_operators={'(' + tuple_fix(binary_operators) + ')'},
+unary_operators={'(' + tuple_fix(unary_operators) + ')'},
 {constraints_str}
+parsimony={parsimony:f}f0,
+alpha={alpha:f}f0,
+maxsize={maxsize:d},
+maxdepth={maxdepth:d},
+fast_cycle={'true' if fast_cycle else 'false'},
+migration={'true' if migration else 'false'},
+hofMigration={'true' if hofMigration else 'false'},
+fractionReplacedHof={fractionReplacedHof}f0,
+shouldOptimizeConstants={'true' if shouldOptimizeConstants else 'false'},
+hofFile="{equation_file}",
+npopulations={populations:d},
+nrestarts={nrestarts:d},
+perturbationFactor={perturbationFactor:f}f0,
+annealing={"true" if annealing else "false"},
+batching={"true" if batching else "false"},
+batchSize={min([batchSize, len(X)]) if batching else len(X):d},
+mutationWeights=[
     {weightMutateConstant:f},
     {weightMutateOperator:f},
     {weightAddNode:f},
     {weightSimplify:f},
     {weightRandomize:f},
     {weightDoNothing:f}
+],
+warmupMaxsize={warmupMaxsize:d},
+useFrequency={"true" if useFrequency else "false"},
+npop={npop:d},
+ncyclesperiteration={ncyclesperiteration:d},
+fractionReplaced={fractionReplaced:f}f0,
+topn={topn:d},
+verbosity=round(Int32, {verbosity:f})
 """
     if use_custom_variable_names:
+        def_hyperparams += f""",
+    varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
+    def_hyperparams += '\n)'
     return def_hyperparams
 def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
+    constraints_str = "una_constraints = ["
     first = True
     for op in unary_operators:
         val = constraints[op]
             constraints_str += ", "
         constraints_str += f"{val:d}"
         first = False
+    constraints_str += """],
+bin_constraints = ["""
     first = True
     for op in binary_operators:
         tup = constraints[op]
             constraints_str += ", "
         constraints_str += f"({tup[0]:d}, {tup[1]:d})"
         first = False
+    constraints_str += "],"
     return constraints_str
                 constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
+def _create_inline_operators(binary_operators, unary_operators, **kwargs):
     def_hyperparams = ""
     for op_list in [binary_operators, unary_operators]:
         for i in range(len(op_list)):
 def _set_paths(tempdir):
     # System-independent paths
+    pkg_directory = Path(__file__).parents[1]
+    default_project_file = pkg_directory / "Project.toml"
     tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
     hyperparam_filename = tmpdir / f'hyperparams.jl'
     dataset_filename = tmpdir / f'dataset.jl'
     runfile_filename = tmpdir / f'runfile.jl'
     X_filename = tmpdir / "X.csv"
     y_filename = tmpdir / "y.csv"
     weights_filename = tmpdir / "weights.csv"
+    return dict(pkg_directory=pkg_directory,
+	    default_project_file=default_project_file,
+	    X_filename=X_filename,
             dataset_filename=dataset_filename,
             hyperparam_filename=hyperparam_filename,
             runfile_filename=runfile_filename, tmpdir=tmpdir,
             weights_filename=weights_filename, y_filename=y_filename)
         assert len(variable_names) == X.shape[1]
 def run_feature_selection(X, y, select_k_features):
     """Use a gradient boosting tree regressor as a proxy for finding
         the k most important features in X, returning indices for those
     repr = str(filename)
     repr = repr.replace('\\', '\\\\')
     return repr
+# https://gist.github.com/garrettdreyfus/8153571
+def _yesno(question):
+    """Simple Yes/No Function."""
+    prompt = f'{question} (y/n): '
+    ans = input(prompt).strip().lower()
+    if ans not in ['y', 'n']:
+        print(f'{ans} is invalid, please try again...')
+        return _yesno(question)
+    if ans == 'y':
+        return True
+    return False

setup.py CHANGED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setuptools.setup(
     name="pysr", # Replace with your own username
-    version="0.3.37",
     author="Miles Cranmer",
     author_email="[email protected]",
     description="Simple and efficient symbolic regression",
@@ -19,7 +19,7 @@ setuptools.setup(
         ],
     packages=setuptools.find_packages(),
     package_data={
-        'pysr': ['../julia/*.jl']
     },
     include_package_data=False,
     classifiers=[

 setuptools.setup(
     name="pysr", # Replace with your own username
+    version="0.4.0",
     author="Miles Cranmer",
     author_email="[email protected]",
     description="Simple and efficient symbolic regression",
         ],
     packages=setuptools.find_packages(),
     package_data={
+        'pysr': ['../Project.toml']
     },
     include_package_data=False,
     classifiers=[

test/test.py CHANGED Viewed

@@ -7,16 +7,16 @@ y = X[:, 0]
 equations = pysr(X, y,
                  niterations=10)
 print(equations)
-assert equations.iloc[-1]['MSE'] < 1e-10
 print("Test 2 - test custom operator")
 y = X[:, 0]**2
 equations = pysr(X, y,
-                 unary_operators=["square(x) = x^2"], binary_operators=["plus"],
                  extra_sympy_mappings={'square': lambda x: x**2},
                  niterations=10)
 print(equations)
-assert equations.iloc[-1]['MSE'] < 1e-10
 X = np.random.randn(100, 1)
 y = X[:, 0] + 3.0
@@ -26,4 +26,4 @@ equations = pysr(X, y,
                  niterations=10)
 print(equations)
-assert equations.iloc[-1]['MSE'] < 1e-10

 equations = pysr(X, y,
                  niterations=10)
 print(equations)
+assert equations.iloc[-1]['MSE'] < 1e-4
 print("Test 2 - test custom operator")
 y = X[:, 0]**2
 equations = pysr(X, y,
+                 unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
                  extra_sympy_mappings={'square': lambda x: x**2},
                  niterations=10)
 print(equations)
+assert equations.iloc[-1]['MSE'] < 1e-4
 X = np.random.randn(100, 1)
 y = X[:, 0] + 3.0
                  niterations=10)
 print(equations)
+assert equations.iloc[-1]['MSE'] < 1e-4