import os from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from collections import namedtuple import pathlib import numpy as np import pandas as pd def eureqa(X=None, y=None, threads=4, parsimony=1e-3, alpha=10, maxsize=20, migration=True, hofMigration=True, fractionReplacedHof=0.1, shouldOptimizeConstants=True, binary_operators=["plus", "mult"], unary_operators=["cos", "exp", "sin"], niterations=20, npop=100, annealing=True, ncyclesperiteration=5000, fractionReplaced=0.1, topn=10, equation_file='hall_of_fame.csv', test='simple1' ): """ Runs symbolic regression in Julia, to fit y given X. Either provide a 2D numpy array for X, 1D array for y, or declare a test to run. Arguments: --threads THREADS Number of threads (default: 4) --parsimony PARSIMONY How much to punish complexity (default: 0.001) --alpha ALPHA Scaling of temperature (default: 10) --maxsize MAXSIZE Max size of equation (default: 20) --niterations NITERATIONS Number of total migration periods (default: 20) --npop NPOP Number of members per population (default: 100) --ncyclesperiteration NCYCLESPERITERATION Number of evolutionary cycles per migration (default: 5000) --topn TOPN How many best species to distribute from each population (default: 10) --fractionReplacedHof FRACTIONREPLACEDHOF Fraction of population to replace with hall of fame (default: 0.1) --fractionReplaced FRACTIONREPLACED Fraction of population to replace with best from other populations (default: 0.1) --migration MIGRATION Whether to migrate (default: True) --hofMigration HOFMIGRATION Whether to have hall of fame migration (default: True) --shouldOptimizeConstants SHOULDOPTIMIZECONSTANTS Whether to use classical optimization on constants before every migration (doesn't impact performance that much) (default: True) --annealing ANNEALING Whether to use simulated annealing (default: True) --equation_file EQUATION_FILE File to dump best equations to (default: hall_of_fame.csv) --test TEST Which test to run (default: simple1) --binary-operators BINARY_OPERATORS [BINARY_OPERATORS ...] Binary operators. Make sure they are defined in operators.jl (default: ['plus', 'mul']) --unary-operators UNARY_OPERATORS Unary operators. Make sure they are defined in operators.jl (default: ['exp', 'sin', 'cos']) Returns: Pandas dataset listing (complexity, MSE, equation string) """ if isinstance(binary_operators, str): binary_operators = [binary_operators] if isinstance(unary_operators, str): unary_operators = [unary_operators] if X is None: if test == 'simple1': eval_str = "X[:, 2]**2 + np.cos(X[:, 3]) - 5" elif test == 'simple2': eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/np.abs(X[:, 0])" X = np.random.randn(100, 5)*3 y = eval(eval_str) print("Runing on", eval_str) def_hyperparams = f""" include("operators.jl") const binops = {'[' + ', '.join(binary_operators) + ']'} const unaops = {'[' + ', '.join(unary_operators) + ']'} const ns=10; const parsimony = {parsimony:f}f0 const alpha = {alpha:f}f0 const maxsize = {maxsize:d} const migration = {'true' if migration else 'false'} const hofMigration = {'true' if hofMigration else 'false'} const fractionReplacedHof = {fractionReplacedHof}f0 const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'} const hofFile = "{equation_file}" const nthreads = {threads:d} """ assert len(X.shape) == 2 assert len(y.shape) == 1 X_str = str(X.tolist()).replace('],', '];').replace(',', '') y_str = str(y.tolist()) def_datasets = """ const X = convert(Array{Float32, 2}, """f"{X_str})"""" const y = convert(Array{Float32, 1}, """f"{y_str})"""" """ with open('.hyperparams.jl', 'w') as f: print(def_hyperparams, file=f) with open('.dataset.jl', 'w') as f: print(def_datasets, file=f) command = [ f'cd {pathlib.Path(__file__).parent.absolute()}', #Move to filepath of code '&&', 'julia -O3', f'--threads {threads}', '-e', f'\'include("eureqa.jl"); fullRun({niterations:d}, npop={npop:d}, annealing={"true" if annealing else "false"}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, 1e9), topn={topn:d})\'', '&&', f'cd {pathlib.Path().absolute()}', ] import os cur_cmd = ' '.join(command[:-2]) print("Running on", cur_cmd) os.system(cur_cmd) output = pd.read_csv(equation_file, sep="|") os.system(command[-1]) return output if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("--threads", type=int, default=4, help="Number of threads") parser.add_argument("--parsimony", type=float, default=0.001, help="How much to punish complexity") parser.add_argument("--alpha", type=int, default=10, help="Scaling of temperature") parser.add_argument("--maxsize", type=int, default=20, help="Max size of equation") parser.add_argument("--niterations", type=int, default=20, help="Number of total migration periods") parser.add_argument("--npop", type=int, default=100, help="Number of members per population") parser.add_argument("--ncyclesperiteration", type=int, default=5000, help="Number of evolutionary cycles per migration") parser.add_argument("--topn", type=int, default=10, help="How many best species to distribute from each population") parser.add_argument("--fractionReplacedHof", type=float, default=0.1, help="Fraction of population to replace with hall of fame") parser.add_argument("--fractionReplaced", type=float, default=0.1, help="Fraction of population to replace with best from other populations") parser.add_argument("--migration", type=bool, default=True, help="Whether to migrate") parser.add_argument("--hofMigration", type=bool, default=True, help="Whether to have hall of fame migration") parser.add_argument("--shouldOptimizeConstants", type=bool, default=True, help="Whether to use classical optimization on constants before every migration (doesn't impact performance that much)") parser.add_argument("--annealing", type=bool, default=True, help="Whether to use simulated annealing") parser.add_argument("--equation_file", type=str, default='hall_of_fame.csv', help="File to dump best equations to") parser.add_argument("--test", type=str, default='simple1', help="Which test to run") parser.add_argument( "--binary-operators", type=str, nargs="+", default=["plus", "mul"], help="Binary operators. Make sure they are defined in operators.jl") parser.add_argument( "--unary-operators", type=str, nargs="+", default=["exp", "sin", "cos"], help="Unary operators. Make sure they are defined in operators.jl") args = vars(parser.parse_args()) #dict eureqa(**args)