Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

Dhananjay Ashok commited on Jan 18, 2021

Commit

d7fdb9c

unverified ·

2 Parent(s): 371f4fe e30b74d

Merge branch 'recover' into refactoring

Browse files

Files changed (21) hide show

.gitignore +1 -2
README.md +2 -7
main.py → example.py +6 -8
julia/{complexityChecks.jl → CheckConstraints.jl} +1 -1
julia/{optimization.jl → ConstantOptimization.jl} +3 -1
julia/{Node.jl → Equation.jl} +0 -0
julia/{eval.jl → EvaluateEquation.jl} +0 -35
julia/{errors.jl → LossFunctions.jl} +46 -1
julia/{simulatedAnnealing.jl → Mutate.jl} +2 -3
julia/{randomMutations.jl → MutationFunctions.jl} +0 -0
julia/{operators.jl → Operators.jl} +0 -0
julia/{constants.jl → ProgramConstants.jl} +0 -0
julia/{regEvolCycle.jl → RegularizedEvolution.jl} +3 -1
julia/{simplification.jl → SimplifyEquation.jl} +0 -0
julia/{run.jl → SingleIteration.jl} +0 -0
julia/{utils.jl → Utils.jl} +3 -1
julia/halloffame.jl +1 -1
julia/sr.jl +0 -45
julia/truth.jl +77 -0
julia/truthPops.jl +170 -0
pysr/sr.py +2 -0

.gitignore CHANGED Viewed

@@ -8,8 +8,7 @@ trials*
 **/__pycache__
 build
 dist
-*.idea/
 *.vs/*
 *.pyproj
 *.sln
-pysr/.vs/

 **/__pycache__
 build
 dist
 *.vs/*
 *.pyproj
 *.sln
+pysr/.vs/

README.md CHANGED Viewed

@@ -65,16 +65,11 @@ pip install pysr
 ```
 # Quickstart
-For a simple demo, start by navigating to the root directory and then in a terminal call
-```
->> python main.py
-```
-<br>
-For a more hands on demo you can do the following (also in the root directory)
 ```python
 import numpy as np
-from pysr import pysr, best, get_hof
 # Dataset
 X = 2*np.random.randn(100, 5)

 ```
 # Quickstart
+Here is some demo code (also found in `example.py`)
 ```python
 import numpy as np
+from pysr import pysr, best
 # Dataset
 X = 2*np.random.randn(100, 5)

main.py → example.py RENAMED Viewed

@@ -1,19 +1,17 @@
 import numpy as np
-from pysr import pysr, best, get_hof
-import time
 # Dataset
 X = 2*np.random.randn(100, 5)
 y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
 # Learn equations
-start = time.time()
 equations = pysr(X, y, niterations=5,
-                binary_operators=["plus", "mult"],
-                unary_operators=["cos", "exp", "sin"])
-...  # (you can use ctl-c to exit early)
 print(best(equations))
-print(f"Took {time.time()-start} seconds")

 import numpy as np
+from pysr import pysr, best
 # Dataset
 X = 2*np.random.randn(100, 5)
 y = 2*np.cos(X[:, 3]) + X[:, 0]**2 - 2
 # Learn equations
 equations = pysr(X, y, niterations=5,
+    binary_operators=["plus", "mult"],
+    unary_operators=[
+      "cos", "exp", "sin", #Pre-defined library of operators (see https://pysr.readthedocs.io/en/latest/docs/operators/)
+      "inv(x) = 1/x"]) # Define your own operator! (Julia syntax)
+...# (you can use ctl-c to exit early)
 print(best(equations))

julia/{complexityChecks.jl → CheckConstraints.jl} RENAMED Viewed

@@ -39,4 +39,4 @@ function flagUnaOperatorComplexity(tree::Node, op::Int)::Bool
     else
         return (flagUnaOperatorComplexity(tree.l, op) || flagUnaOperatorComplexity(tree.r, op))
     end
-end

     else
         return (flagUnaOperatorComplexity(tree.l, op) || flagUnaOperatorComplexity(tree.r, op))
     end
+end

julia/{optimization.jl → ConstantOptimization.jl} RENAMED Viewed

@@ -1,3 +1,5 @@
 # Proxy function for optimization
 function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
     setConstants(tree, x)
@@ -44,4 +46,4 @@ function optimizeConstants(member::PopMember)::PopMember
         end
     end
     return member
-end

+import Optim
 # Proxy function for optimization
 function optFunc(x::Array{Float32, 1}, tree::Node)::Float32
     setConstants(tree, x)
         end
     end
     return member
+end

julia/{Node.jl → Equation.jl} RENAMED Viewed

File without changes

julia/{eval.jl → EvaluateEquation.jl} RENAMED Viewed

@@ -45,38 +45,3 @@ function evalTreeArray(tree::Node, cX::Array{Float32, 2})::Union{Array{Float32,
         return cumulator
     end
 end
-# Score an equation
-function scoreFunc(tree::Node)::Float32
-    prediction = evalTreeArray(tree)
-    if prediction === nothing
-        return 1f9
-    end
-    if weighted
-        mse = MSE(prediction, y, weights)
-    else
-        mse = MSE(prediction, y)
-    end
-    return mse / baselineMSE + countNodes(tree)*parsimony
-end
-# Score an equation with a small batch
-function scoreFuncBatch(tree::Node)::Float32
-    # batchSize
-    batch_idx = randperm(len)[1:batchSize]
-    batch_X = X[batch_idx, :]
-    prediction = evalTreeArray(tree, batch_X)
-    if prediction === nothing
-        return 1f9
-    end
-    size_adjustment = 1f0
-    batch_y = y[batch_idx]
-    if weighted
-        batch_w = weights[batch_idx]
-        mse = MSE(prediction, batch_y, batch_w)
-        size_adjustment = 1f0 * len / batchSize
-    else
-        mse = MSE(prediction, batch_y)
-    end
-    return size_adjustment * mse / baselineMSE + countNodes(tree)*parsimony
-end

         return cumulator
     end
 end

julia/{errors.jl → LossFunctions.jl} RENAMED Viewed

@@ -1,3 +1,5 @@
 # Sum of square error between two arrays
 function SSE(x::Array{Float32}, y::Array{Float32})::Float32
     diff = (x - y)
@@ -34,4 +36,47 @@ end
 # Mean of square error between two arrays
 function MSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
     return SSE(x, y, w)/sum(w)
-end

+import Random: randperm
 # Sum of square error between two arrays
 function SSE(x::Array{Float32}, y::Array{Float32})::Float32
     diff = (x - y)
 # Mean of square error between two arrays
 function MSE(x::Array{Float32}, y::Array{Float32}, w::Array{Float32})::Float32
     return SSE(x, y, w)/sum(w)
+end
+if weighted
+    const avgy = sum(y .* weights)/sum(weights)
+    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy), weights)
+else
+    const avgy = sum(y)/len
+    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy))
+end
+# Score an equation
+function scoreFunc(tree::Node)::Float32
+    prediction = evalTreeArray(tree)
+    if prediction === nothing
+        return 1f9
+    end
+    if weighted
+        mse = MSE(prediction, y, weights)
+    else
+        mse = MSE(prediction, y)
+    end
+    return mse / baselineMSE + countNodes(tree)*parsimony
+end
+# Score an equation with a small batch
+function scoreFuncBatch(tree::Node)::Float32
+    # batchSize
+    batch_idx = randperm(len)[1:batchSize]
+    batch_X = X[batch_idx, :]
+    prediction = evalTreeArray(tree, batch_X)
+    if prediction === nothing
+        return 1f9
+    end
+    size_adjustment = 1f0
+    batch_y = y[batch_idx]
+    if weighted
+        batch_w = weights[batch_idx]
+        mse = MSE(prediction, batch_y, batch_w)
+        size_adjustment = 1f0 * len / batchSize
+    else
+        mse = MSE(prediction, batch_y)
+    end
+    return size_adjustment * mse / baselineMSE + countNodes(tree)*parsimony
+end

julia/{simulatedAnnealing.jl → Mutate.jl} RENAMED Viewed

@@ -1,5 +1,4 @@
-# Go through one simulated annealing mutation cycle
-#  exp(-delta/T) defines probability of accepting a change
 function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyComplexity::Array{Float32, 1})::PopMember
     prev = member.tree
     tree = prev
@@ -122,4 +121,4 @@ function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyCo
         end
     end
     return PopMember(tree, afterLoss)
-end

+# Go through one mutation cycle
 function iterate(member::PopMember, T::Float32, curmaxsize::Integer, frequencyComplexity::Array{Float32, 1})::PopMember
     prev = member.tree
     tree = prev
         end
     end
     return PopMember(tree, afterLoss)
+end

julia/{randomMutations.jl → MutationFunctions.jl} RENAMED Viewed

File without changes

julia/{operators.jl → Operators.jl} RENAMED Viewed

File without changes

julia/{constants.jl → ProgramConstants.jl} RENAMED Viewed

File without changes

julia/{regEvolCycle.jl → RegularizedEvolution.jl} RENAMED Viewed

@@ -1,3 +1,5 @@
 # Pass through the population several times, replacing the oldest
 # with the fittest of a small subsample
 function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
@@ -41,4 +43,4 @@ function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
     end
     return pop
-end

+import Random: shuffle!
 # Pass through the population several times, replacing the oldest
 # with the fittest of a small subsample
 function regEvolCycle(pop::Population, T::Float32, curmaxsize::Integer,
     end
     return pop
+end

julia/{simplification.jl → SimplifyEquation.jl} RENAMED Viewed

File without changes

julia/{run.jl → SingleIteration.jl} RENAMED Viewed

File without changes

julia/{utils.jl → Utils.jl} RENAMED Viewed

@@ -1,3 +1,5 @@
 function id(x::Float32)::Float32
     x
 end
@@ -29,4 +31,4 @@ function testConfiguration()
         @printf("\n\nYour configuration is invalid - one of your operators is not well-defined over the real line.\n\n\n")
         throw(error)
     end
-end

+import Printf: @printf
 function id(x::Float32)::Float32
     x
 end
         @printf("\n\nYour configuration is invalid - one of your operators is not well-defined over the real line.\n\n\n")
         throw(error)
     end
+end

julia/halloffame.jl CHANGED Viewed

@@ -5,4 +5,4 @@ mutable struct HallOfFame
     # Arranged by complexity - store one at each.
     HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
-end

     # Arranged by complexity - store one at each.
     HallOfFame() = new([PopMember(Node(1f0), 1f9) for i=1:actualMaxsize], [false for i=1:actualMaxsize])
+end

julia/sr.jl CHANGED Viewed

@@ -1,49 +1,4 @@
-import Optim
 import Printf: @printf
-import Random: shuffle!, randperm
-include("constants.jl")
-include("errors.jl")
-if weighted
-    const avgy = sum(y .* weights)/sum(weights)
-    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy), weights)
-else
-    const avgy = sum(y)/len
-    const baselineMSE = MSE(y, convert(Array{Float32, 1}, ones(len) .* avgy))
-end
-include("utils.jl")
-include("Node.jl")
-include("eval.jl")
-include("randomMutations.jl")
-include("simplification.jl")
-include("PopMember.jl")
-include("halloffame.jl")
-include("complexityChecks.jl")
-include("simulatedAnnealing.jl")
-include("Population.jl")
-include("regEvolCycle.jl")
-include("run.jl")
-include("optimization.jl")
 function fullRun(niterations::Integer;
                 npop::Integer=300,

 import Printf: @printf
 function fullRun(niterations::Integer;
                 npop::Integer=300,

julia/truth.jl ADDED Viewed

	@@ -0,0 +1,77 @@

+# *** Custom Functions
+##################################################################################################################################
+# *** Will somewhere need to define a list TRUTHS of all valid auxliary truths
+struct Transformation
+    type::Integer # 1 is symmetry, 2 is zero, 3 is equality
+    params::Array{Int32}
+    Transformation(type::Integer, params::Array{Int32}) = new(type, params)
+    Transformation(type::Integer, params::Array{Int64}) = new(type, params)
+end
+struct Truth
+    transformation::Transformation
+    weights::Array{Float32}
+    Truth(transformation::Transformation, weights::Array{Float32}) = new(transformation, weights)
+    Truth(type::Int64, params::Array{Int64}, weights::Array{Float32}) = new(Transformation(type, params), weights)
+    Truth(transformation::Transformation, weights::Array{Float64}) = new(transformation, weights)
+    Truth(type::Int64, params::Array{Int64}, weights::Array{Float64}) = new(Transformation(type, params), weights)
+end
+# Returns a linear combination when given X of shape nxd, y of shape nx1 is f(x) and w of shape d+2x1, result is shape nx1
+function LinearPrediction(cX::Array{Float32}, cy::Array{Float32}, w::Array{Float32})::Array{Float32}
+     preds = 0
+     for i in 1:ndims(cX)
+       preds = preds .+ cX[:,i].*w[i]
+       end
+     preds = preds .+ cy.*w[ndims(cX)+1]
+     return preds .+ w[ndims(cX)+2]
+end
+# Returns a copy of the data with the two specified columns swapped
+function swapColumns(cX::Array{Float32, 2}, a::Integer, b::Integer)::Array{Float32, 2}
+    X1 = copy(cX)
+    X1[:, a] = cX[:, b]
+    X1[:, b] = cX[:, a]
+    return X1
+end
+# Returns a copy of the data with the specified integers in the list set to value given
+function setVal(cX::Array{Float32, 2}, a::Array{Int32, 1}, val::Float32)::Array{Float32, 2}
+    X1 = copy(cX)
+    for i in 1:size(a)[1]
+        X1[:, a[i]] = fill!(cX[:, a[i]], val)
+    end
+    return X1
+end
+# Returns a copy of the data with the specified integer indices in the list set to the first item of that list
+function setEq(cX::Array{Float32, 2}, a::Array{Int32, 1})::Array{Float32, 2}
+    X1 = copy(cX)
+    val = X1[:, a[1]]
+    for i in 1:size(a)[1]
+        X1[:, a[i]] = val
+    end
+    return X1
+end
+# Takes in a dataset and returns the transformed version of it as per the specified type and parameters
+function transform(cX::Array{Float32, 2}, transformation::Transformation)::Array{Float32, 2}
+    if transformation.type==1 # then symmetry
+        a = transformation.params[1]
+        b = transformation.params[2]
+        return swapColumns(cX, a, b)
+    elseif transformation.type==2 # then zero condition
+        return setVal(cX, transformation.params, Float32(0))
+    elseif transformation.type == 3 # then equality condition
+        return setEq(cX, transformation.params)
+    else # Then error return X
+        return cX
+    end
+end
+function transform(cX::Array{Float32, 2}, truth::Truth)::Array{Float32, 2}
+    return transform(cX, truth.transformation)
+end
+# Takes in X that has been transformed and returns what the Truth projects the target values should be
+function truthPrediction(X_transformed::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Array{Float32}
+    return LinearPrediction(X_transformed, cy, truth.weights)
+end

julia/truthPops.jl ADDED Viewed

	@@ -0,0 +1,170 @@

+# Returns the MSE between the predictions and the truth provided targets for the given dataset
+function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
+    transformed = transform(cX, truth)
+    targets = truthPrediction(transformed, cy, truth)
+    preds = evalTreeArray(member.tree, transformed)
+    return MSE(preds, targets)
+end
+# Assumes a dataset X, y for a given truth
+function truthScore(member::PopMember, truth::Truth)::Float32
+    return truthScore(member, X, y, truth)
+end
+# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
+function truthScore(member::PopMember, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
+    s = 0
+    for truth in TRUTHS
+        s += (truthScore(member, cX, cy, truth))/size(TRUTHS)[1]
+    end
+    return s
+end
+# Assumes list of Truths TRUTHS and dataset X, y are defined
+function truthScore(member::PopMember)::Float32
+    return truthScore(member, X, y)
+end
+# Returns the MSE between the predictions and the truth provided targets for the given dataset
+function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth)::Float32
+    transformed = transform(cX, truth)
+    targets = truthPrediction(transformed, cy, truth)
+    preds = evalTreeArray(tree, transformed)
+    return MSE(preds, targets)
+end
+# Assumes a dataset X, y for a given truth
+function truthScore(tree::Node, truth::Truth)::Float32
+    return truthScore(tree, X, y, truth)
+end
+# Assumes a list of Truths TRUTHS is defined. Performs the truthScore function for each of them and returns the average
+function truthScore(tree::Node, cX::Array{Float32, 2}, cy::Array{Float32})::Float32
+    s = 0
+    for truth in TRUTHS
+        s += (truthScore(tree, cX, cy, truth))/size(TRUTHS)[1]
+    end
+    return s
+end
+# Assumes list of Truths TRUTHS and dataset X, y are defined
+function truthScore(tree::Node)::Float32
+    return truthScore(tree, X, y)
+end
+# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
+function testTruth(member::PopMember, truth::Truth, threshold::Float32=Float32(1.0e-8))::Bool
+    truthError = truthScore(member, truth)
+    #print(stringTree(member.tree), "\n")
+    #print(truth, ": ")
+    #print(truthError, "\n")
+    if truthError > threshold
+        #print("Returns False \n ----\n")
+        return false
+    else
+        #print("Returns True \n ----\n")
+        return true
+    end
+end
+# Returns a list of violating functions from assumed list TRUTHS
+function violatingTruths(member::PopMember)::Array{Truth}
+    return violatingTruths(member.tree)
+end
+# Returns true iff Truth Score is below a given threshold i.e truth is satisfied
+function testTruth(tree::Node, truth::Truth, threshold::Float32=Float32(1.0e-3))::Bool
+    truthError = truthScore(tree, truth)
+    if truthError > threshold
+        return false
+    else
+        return true
+    end
+end
+# Returns a list of violating functions from assumed list TRUTHS
+function violatingTruths(tree::Node)::Array{Truth}
+    toReturn = []
+    #print("\n Checking Equation ", stringTree(tree), "\n")
+    for truth in TRUTHS
+        test_truth = testTruth(tree, truth)
+        #print("Truth: ", truth, ": " , test_truth, "\n-----\n")
+        if !test_truth
+            append!(toReturn, [truth])
+        end
+    end
+    return toReturn
+end
+function randomIndex(cX::Array{Float32, 2}, k::Integer=10)::Array{Int32, 1}
+    indxs = sample([Int32(i) for i in 1:size(cX)[1]], k)
+    return indxs
+end
+function randomIndex(leng::Integer, k::Integer=10)::Array{Int32, 1}
+    indxs = sample([Int32(i) for i in 1:leng], k)
+    return indxs
+end
+function extendedX(cX::Array{Float32, 2}, truth::Truth, indx::Array{Int32, 1})::Array{Float32, 2}
+    workingcX = copy(cX)
+    X_slice = workingcX[indx, :]
+    X_transformed = transform(X_slice, truth)
+    return X_transformed
+end
+function extendedX(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
+    return extendedX(OGX, truth, indx)
+end
+function extendedX(cX::Array{Float32, 2}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
+    if length(violatedTruths) == 0
+        return nothing
+    end
+    workingX = extendedX(cX, violatedTruths[1], indx)
+    for truth in violatedTruths[2:length(violatedTruths)]
+        workingX = vcat(workingX, extendedX(cX, truth, indx))
+    end
+    return workingX
+end
+function extendedX(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
+    return extendedX(OGX, violatedTruths, indx)
+end
+function extendedX(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
+    violatedTruths = violatingTruths(tree)
+    return extendedX(violatedTruths, indx)
+end
+function extendedX(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32, 2}, Nothing}
+    return extendedX(member.tree, indx)
+end
+function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    cy = copy(cy)
+    cX = copy(cX)
+    X_slice = cX[indx, :]
+    y_slice = cy[indx]
+    X_transformed = transform(X_slice, truth)
+    y_transformed = truthPrediction(X_transformed, y_slice, truth)
+    return y_transformed
+end
+function extendedy(truth::Truth, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    return extendedy(OGX, OGy, truth, indx)
+end
+function extendedy(cX::Array{Float32, 2}, cy::Array{Float32}, violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    if length(violatedTruths) == 0
+        return nothing
+    end
+    workingy = extendedy(cX, cy, violatedTruths[1], indx)
+    for truth in violatedTruths[2:length(violatedTruths)]
+        workingy = vcat(workingy, extendedy(cX, cy, truth, indx))
+    end
+    return workingy
+end
+function extendedy(violatedTruths::Array{Truth}, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    return extendedy(OGX,OGy, violatedTruths, indx)
+end
+function extendedy(tree::Node, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    violatedTruths = violatingTruths(tree)
+    return extendedy(violatedTruths, indx)
+end
+function extendedy(member::PopMember, indx::Array{Int32, 1})::Union{Array{Float32}, Nothing}
+    return extendedy(member.tree, indx)
+end

pysr/sr.py CHANGED Viewed

@@ -227,11 +227,13 @@ def pysr(X=None, y=None, weights=None,
     # Add pre-defined functions to Julia
     def_hyperparams = predefined_function_addition(binary_operators, def_hyperparams, unary_operators)
     #arbitrary complexity by default
     handle_constraints(binary_operators, constraints, unary_operators)
     constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
     def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
                                                  constraints_str, def_hyperparams, equation_file, fast_cycle,
                                                  fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,

     # Add pre-defined functions to Julia
     def_hyperparams = predefined_function_addition(binary_operators, def_hyperparams, unary_operators)
     #arbitrary complexity by default
     handle_constraints(binary_operators, constraints, unary_operators)
     constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
     def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
                                                  constraints_str, def_hyperparams, equation_file, fast_cycle,
                                                  fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,