Spaces:
Sleeping
Sleeping
File size: 7,847 Bytes
cfca8a4 69c3f28 cfca8a4 9b9db9e a3a2513 cfca8a4 a3a2513 cfca8a4 a3a2513 cfca8a4 c27a9c8 a3a2513 c27a9c8 a3a2513 395823a a3a2513 c27a9c8 a3a2513 b66d8de a3a2513 d8f5888 a3a2513 b66d8de cfca8a4 3f4ce91 4c048fc cfca8a4 a3a2513 cfca8a4 a3a2513 cfca8a4 e6db1f3 cfca8a4 ea4213e cfca8a4 a3a2513 cfca8a4 a3a2513 e6db1f3 a3a2513 e6db1f3 a3a2513 cfca8a4 9b9db9e cfca8a4 69c3f28 ea4213e 3f4ce91 a3a2513 ea4213e cfca8a4 395823a cfca8a4 d8f5888 cfca8a4 f1cd245 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import os
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from collections import namedtuple
import pathlib
import numpy as np
import pandas as pd
def eureqa(X=None, y=None, threads=4, parsimony=1e-3, alpha=10,
maxsize=20, migration=True,
hofMigration=True, fractionReplacedHof=0.1,
shouldOptimizeConstants=True,
binary_operators=["plus", "mult"],
unary_operators=["cos", "exp", "sin"],
niterations=20, npop=100, annealing=True,
ncyclesperiteration=5000, fractionReplaced=0.1,
topn=10, equation_file='hall_of_fame.csv',
test='simple1'
):
""" Runs symbolic regression in Julia, to fit y given X.
Either provide a 2D numpy array for X, 1D array for y, or declare a test to run.
Arguments:
--threads THREADS Number of threads (default: 4)
--parsimony PARSIMONY
How much to punish complexity (default: 0.001)
--alpha ALPHA Scaling of temperature (default: 10)
--maxsize MAXSIZE Max size of equation (default: 20)
--niterations NITERATIONS
Number of total migration periods (default: 20)
--npop NPOP Number of members per population (default: 100)
--ncyclesperiteration NCYCLESPERITERATION
Number of evolutionary cycles per migration (default:
5000)
--topn TOPN How many best species to distribute from each
population (default: 10)
--fractionReplacedHof FRACTIONREPLACEDHOF
Fraction of population to replace with hall of fame
(default: 0.1)
--fractionReplaced FRACTIONREPLACED
Fraction of population to replace with best from other
populations (default: 0.1)
--migration MIGRATION
Whether to migrate (default: True)
--hofMigration HOFMIGRATION
Whether to have hall of fame migration (default: True)
--shouldOptimizeConstants SHOULDOPTIMIZECONSTANTS
Whether to use classical optimization on constants
before every migration (doesn't impact performance
that much) (default: True)
--annealing ANNEALING
Whether to use simulated annealing (default: True)
--equation_file EQUATION_FILE
File to dump best equations to (default:
hall_of_fame.csv)
--test TEST Which test to run (default: simple1)
--binary-operators BINARY_OPERATORS [BINARY_OPERATORS ...]
Binary operators. Make sure they are defined in
operators.jl (default: ['plus', 'mult'])
--unary-operators UNARY_OPERATORS
Unary operators. Make sure they are defined in
operators.jl (default: ['exp', 'sin', 'cos'])
Returns:
Pandas dataset listing (complexity, MSE, equation string)
"""
if isinstance(binary_operators, str): binary_operators = [binary_operators]
if isinstance(unary_operators, str): unary_operators = [unary_operators]
if X is None:
if test == 'simple1':
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
elif test == 'simple2':
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/np.abs(X[:, 0])"
X = np.random.randn(100, 5)*3
y = eval(eval_str)
print("Running on", eval_str)
def_hyperparams = f"""
include("operators.jl")
const binops = {'[' + ', '.join(binary_operators) + ']'}
const unaops = {'[' + ', '.join(unary_operators) + ']'}
const ns=10;
const parsimony = {parsimony:f}f0
const alpha = {alpha:f}f0
const maxsize = {maxsize:d}
const migration = {'true' if migration else 'false'}
const hofMigration = {'true' if hofMigration else 'false'}
const fractionReplacedHof = {fractionReplacedHof}f0
const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
const hofFile = "{equation_file}"
const nthreads = {threads:d}
"""
assert len(X.shape) == 2
assert len(y.shape) == 1
X_str = str(X.tolist()).replace('],', '];').replace(',', '')
y_str = str(y.tolist())
def_datasets = """
const X = convert(Array{Float32, 2}, """f"{X_str})""""
const y = convert(Array{Float32, 1}, """f"{y_str})""""
"""
starting_path = f'cd {pathlib.Path().absolute()}'
code_path = f'cd {pathlib.Path(__file__).parent.absolute()}' #Move to filepath of code
os.system(code_path)
with open('.hyperparams.jl', 'w') as f:
print(def_hyperparams, file=f)
with open('.dataset.jl', 'w') as f:
print(def_datasets, file=f)
command = [
'julia -O3',
f'--threads {threads}',
'-e',
f'\'include("eureqa.jl"); fullRun({niterations:d}, npop={npop:d}, annealing={"true" if annealing else "false"}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, 1e9), topn={topn:d})\'',
]
cur_cmd = ' '.join(command)
print("Running on", cur_cmd)
os.system(cur_cmd)
output = pd.read_csv(equation_file, sep="|")
os.system(starting_path)
return output
if __name__ == "__main__":
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument("--threads", type=int, default=4, help="Number of threads")
parser.add_argument("--parsimony", type=float, default=0.001, help="How much to punish complexity")
parser.add_argument("--alpha", type=int, default=10, help="Scaling of temperature")
parser.add_argument("--maxsize", type=int, default=20, help="Max size of equation")
parser.add_argument("--niterations", type=int, default=20, help="Number of total migration periods")
parser.add_argument("--npop", type=int, default=100, help="Number of members per population")
parser.add_argument("--ncyclesperiteration", type=int, default=5000, help="Number of evolutionary cycles per migration")
parser.add_argument("--topn", type=int, default=10, help="How many best species to distribute from each population")
parser.add_argument("--fractionReplacedHof", type=float, default=0.1, help="Fraction of population to replace with hall of fame")
parser.add_argument("--fractionReplaced", type=float, default=0.1, help="Fraction of population to replace with best from other populations")
parser.add_argument("--migration", type=bool, default=True, help="Whether to migrate")
parser.add_argument("--hofMigration", type=bool, default=True, help="Whether to have hall of fame migration")
parser.add_argument("--shouldOptimizeConstants", type=bool, default=True, help="Whether to use classical optimization on constants before every migration (doesn't impact performance that much)")
parser.add_argument("--annealing", type=bool, default=True, help="Whether to use simulated annealing")
parser.add_argument("--equation_file", type=str, default='hall_of_fame.csv', help="File to dump best equations to")
parser.add_argument("--test", type=str, default='simple1', help="Which test to run")
parser.add_argument(
"--binary-operators", type=str, nargs="+", default=["plus", "mult"],
help="Binary operators. Make sure they are defined in operators.jl")
parser.add_argument(
"--unary-operators", type=str, nargs="+", default=["exp", "sin", "cos"],
help="Unary operators. Make sure they are defined in operators.jl")
args = vars(parser.parse_args()) #dict
eureqa(**args)
|