MilesCranmer commited on
Commit
333f394
1 Parent(s): 7b7f087

Full docstring

Browse files
Files changed (1) hide show
  1. eureqa.py +51 -47
eureqa.py CHANGED
@@ -51,54 +51,58 @@ def eureqa(X=None, y=None, threads=4,
51
  test='simple1',
52
  maxsize=20,
53
  ):
54
- """ Runs symbolic regression in Julia, to fit y given X.
55
- Either provide a 2D numpy array for X, 1D array for y, or declare a test to run.
56
-
57
- Arguments:
58
-
59
- --threads THREADS Number of threads (default: 4)
60
- --parsimony PARSIMONY
61
- How much to punish complexity (default: 0.001)
62
- --alpha ALPHA Scaling of temperature (default: 10)
63
- --maxsize MAXSIZE Max size of equation (default: 20)
64
- --niterations NITERATIONS
65
- Number of total migration periods (default: 20)
66
- --npop NPOP Number of members per population (default: 100)
67
- --ncyclesperiteration NCYCLESPERITERATION
68
- Number of evolutionary cycles per migration (default:
69
- 5000)
70
- --topn TOPN How many best species to distribute from each
71
- population (default: 10)
72
- --fractionReplacedHof FRACTIONREPLACEDHOF
73
- Fraction of population to replace with hall of fame
74
- (default: 0.1)
75
- --fractionReplaced FRACTIONREPLACED
76
- Fraction of population to replace with best from other
77
- populations (default: 0.1)
78
- --migration MIGRATION
79
- Whether to migrate (default: True)
80
- --hofMigration HOFMIGRATION
81
- Whether to have hall of fame migration (default: True)
82
- --shouldOptimizeConstants SHOULDOPTIMIZECONSTANTS
83
- Whether to use classical optimization on constants
84
- before every migration (doesn't impact performance
85
- that much) (default: True)
86
- --annealing ANNEALING
87
- Whether to use simulated annealing (default: True)
88
- --equation_file EQUATION_FILE
89
- File to dump best equations to (default:
90
- hall_of_fame.csv)
91
- --test TEST Which test to run (default: simple1)
92
- --binary-operators BINARY_OPERATORS [BINARY_OPERATORS ...]
93
- Binary operators. Make sure they are defined in
94
- operators.jl (default: ['plus', 'mult'])
95
- --unary-operators UNARY_OPERATORS
96
- Unary operators. Make sure they are defined in
97
- operators.jl (default: ['exp', 'sin', 'cos'])
98
-
99
- Returns:
100
- Pandas dataset listing (complexity, MSE, equation string)
 
 
 
101
  """
 
102
  rand_string = f'{"".join([str(np.random.rand())[2] for i in range(20)])}'
103
 
104
  if isinstance(binary_operators, str): binary_operators = [binary_operators]
 
51
  test='simple1',
52
  maxsize=20,
53
  ):
54
+ """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
55
+
56
+ Note: most default parameters have been tuned over several example
57
+ equations, but you should adjust `threads`, `niterations`,
58
+ `binary_operators`, `unary_operators` to your requirements.
59
+
60
+ :X: np.ndarray, 2D. Rows are examples, columns are features.
61
+ :y: np.ndarray, 1D. Rows are examples.
62
+ :threads: Number of threads (=number of populations running).
63
+ You can have more threads than cores - it actually makes it more
64
+ efficient.
65
+ :niterations: Number of iterations of the algorithm to run. The best
66
+ equations are printed, and migrate between populations, at the
67
+ end of each.
68
+ :ncyclesperiteration: Number of total mutations to run, per 10
69
+ samples of the population, per iteration.
70
+ :binary_operators: List of strings giving the binary operators
71
+ in Julia's Base, or in `operator.jl`.
72
+ :unary_operators: Same but for operators taking a single `Float32`.
73
+ :alpha: Initial temperature.
74
+ :annealing: Whether to use annealing. You should (and it is default).
75
+ :fractionReplaced: How much of population to replace with migrating
76
+ equations from other populations.
77
+ :fractionReplacedHof: How much of population to replace with migrating
78
+ equations from hall of fame.
79
+ :npop: Number of individuals in each population
80
+ :parsimony: Multiplicative factor for how much to punish complexity.
81
+ :migration: Whether to migrate.
82
+ :hofMigration: Whether to have the hall of fame migrate.
83
+ :shouldOptimizeConstants: Whether to numerically optimize
84
+ constants (Nelder-Mead/Newton) at the end of each iteration.
85
+ :topn: How many top individuals migrate from each population.
86
+ :weightAddNode: Relative likelihood for mutation to add a node
87
+ :weightDeleteNode: Relative likelihood for mutation to delete a node
88
+ :weightDoNothing: Relative likelihood for mutation to leave the individual
89
+ :weightMutateConstant: Relative likelihood for mutation to change
90
+ the constant slightly in a random direction.
91
+ :weightMutateOperator: Relative likelihood for mutation to swap
92
+ an operator.
93
+ :weightRandomize: Relative likelihood for mutation to completely
94
+ delete and then randomly generate the equation
95
+ :weightSimplify: Relative likelihood for mutation to simplify
96
+ constant parts by evaluation
97
+ :timeout: Time in seconds to timeout search
98
+ :equation_file: Where to save the files (.csv separated by |)
99
+ :test: What test to run, if X,y not passed.
100
+ :maxsize: Max size of an equation.
101
+ :returns: pd.DataFrame, giving complexity, MSE, and equations
102
+ (as strings).
103
+
104
  """
105
+
106
  rand_string = f'{"".join([str(np.random.rand())[2] for i in range(20)])}'
107
 
108
  if isinstance(binary_operators, str): binary_operators = [binary_operators]