MilesCranmer commited on
Commit
55b1295
1 Parent(s): c2b20b6

Full update of benchmark script

Browse files
Files changed (1) hide show
  1. benchmarks/hyperparamopt.py +136 -88
benchmarks/hyperparamopt.py CHANGED
@@ -2,28 +2,27 @@
2
  import sys
3
  import numpy as np
4
  import pickle as pkl
 
5
  import hyperopt
6
  from hyperopt import hp, fmin, tpe, Trials
7
- import pysr
8
- import time
9
-
10
- import contextlib
11
-
12
-
13
- @contextlib.contextmanager
14
- def temp_seed(seed):
15
- state = np.random.get_state()
16
- np.random.seed(seed)
17
- try:
18
- yield
19
- finally:
20
- np.random.set_state(state)
21
-
22
 
23
  # Change the following code to your file
24
  ################################################################################
25
  TRIALS_FOLDER = "trials"
26
  NUMBER_TRIALS_PER_RUN = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def run_trial(args):
@@ -33,81 +32,136 @@ def run_trial(args):
33
  :returns: Dict with status and loss from cross-validation
34
 
35
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- print("Running on", args)
38
- args["niterations"] = 100
39
- args["npop"] = 100
40
- args["ncyclesperiteration"] = 1000
41
- args["topn"] = 10
42
- args["parsimony"] = 0.0
43
- args["useFrequency"] = True
44
- args["annealing"] = True
45
-
46
- if args["npop"] < 20 or args["ncyclesperiteration"] < 3:
47
- print("Bad parameters")
48
- return {"status": "ok", "loss": np.inf}
49
-
50
- args["weightDoNothing"] = 1.0
51
  ntrials = 3
 
 
 
 
52
 
53
- with temp_seed(0):
54
- X = np.random.randn(100, 10) * 3
55
 
56
- eval_str = [
57
- "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5",
58
- "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)",
59
- "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)",
60
- "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0",
61
- ]
62
 
63
- print("Starting", str(args))
64
- try:
65
- local_trials = []
66
- for i in range(len(eval_str)):
67
- print(f"Starting test {i}")
68
- for j in range(ntrials):
69
- print(f"Starting trial {j}")
70
- y = eval(eval_str[i])
71
- trial = pysr.pysr(
72
- X,
73
- y,
74
- procs=4,
75
- populations=20,
76
- binary_operators=["plus", "mult", "pow", "div"],
77
- unary_operators=["cos", "exp", "sin", "logm", "abs"],
78
- maxsize=25,
79
- constraints={"pow": (-1, 1)},
80
- **args,
81
- )
82
- if len(trial) == 0:
83
- raise ValueError
84
- local_trials.append(
85
- np.min(trial["MSE"]) ** 0.5 / np.std(eval(eval_str[i - 1]))
86
- )
87
- print(f"Test {i} trial {j} with", str(args), f"got {local_trials[-1]}")
88
-
89
- except ValueError:
90
- print("Broken", str(args))
91
- return {"status": "ok", "loss": np.inf} # or 'fail' if nan loss
92
- loss = np.average(local_trials)
93
  print(f"Finished with {loss}", str(args))
94
 
95
  return {"status": "ok", "loss": loss} # or 'fail' if nan loss
96
 
97
 
98
- space = {
99
- "alpha": hp.lognormal("alpha", np.log(10.0), 1.0),
100
- "fractionReplacedHof": hp.lognormal("fractionReplacedHof", np.log(0.1), 1.0),
101
- "fractionReplaced": hp.lognormal("fractionReplaced", np.log(0.1), 1.0),
102
- "perturbationFactor": hp.lognormal("perturbationFactor", np.log(1.0), 1.0),
103
- "weightMutateConstant": hp.lognormal("weightMutateConstant", np.log(4.0), 1.0),
104
- "weightMutateOperator": hp.lognormal("weightMutateOperator", np.log(0.5), 1.0),
105
- "weightAddNode": hp.lognormal("weightAddNode", np.log(0.5), 1.0),
106
- "weightInsertNode": hp.lognormal("weightInsertNode", np.log(0.5), 1.0),
107
- "weightDeleteNode": hp.lognormal("weightDeleteNode", np.log(0.5), 1.0),
108
- "weightSimplify": hp.lognormal("weightSimplify", np.log(0.05), 1.0),
109
- "weightRandomize": hp.lognormal("weightRandomize", np.log(0.25), 1.0),
110
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  ################################################################################
113
 
@@ -178,7 +232,7 @@ while True:
178
  max_evals=n + len(trials.trials),
179
  trials=trials,
180
  verbose=1,
181
- rstate=np.random.RandomState(np.random.randint(1, 10**6)),
182
  )
183
  except hyperopt.exceptions.AllTrialsFailed:
184
  continue
@@ -188,12 +242,6 @@ while True:
188
 
189
  # Merge with empty trials dataset:
190
  save_trials = merge_trials(hyperopt_trial, trials.trials[-n:])
191
- new_fname = (
192
- TRIALS_FOLDER
193
- + "/"
194
- + str(np.random.randint(0, sys.maxsize))
195
- + str(time.time())
196
- + ".pkl"
197
- )
198
  pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb"))
199
  loaded_fnames.append(new_fname)
 
2
  import sys
3
  import numpy as np
4
  import pickle as pkl
5
+ from pysr import PySRRegressor
6
  import hyperopt
7
  from hyperopt import hp, fmin, tpe, Trials
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Change the following code to your file
10
  ################################################################################
11
  TRIALS_FOLDER = "trials"
12
  NUMBER_TRIALS_PER_RUN = 1
13
+ timeout_in_seconds = 5 * 60
14
+
15
+ # Test run to compile everything:
16
+ binary_operators = ["*", "/", "+", "-"]
17
+ unary_operators = ["sin", "cos", "exp", "log"]
18
+ julia_project = None
19
+ model = PySRRegressor(
20
+ binary_operators=binary_operators,
21
+ unary_operators=unary_operators,
22
+ timeout_in_seconds=30,
23
+ julia_project=julia_project,
24
+ )
25
+ model.fit(np.random.randn(100, 3), np.random.randn(100))
26
 
27
 
28
  def run_trial(args):
 
32
  :returns: Dict with status and loss from cross-validation
33
 
34
  """
35
+ # The arguments which are integers:
36
+ integer_args = [
37
+ "populations",
38
+ "niterations",
39
+ "ncyclesperiteration",
40
+ "npop",
41
+ "topn",
42
+ "maxsize",
43
+ "optimizer_nrestarts",
44
+ "optimizer_iterations",
45
+ ]
46
+ # Set these to int types:
47
+ for k, v in args.items():
48
+ if k in integer_args:
49
+ args[k] = int(v)
50
+
51
+ # Duplicate this argument:
52
+ args["tournament_selection_n"] = args["topn"]
53
+
54
+ # Invalid hyperparams:
55
+ invalid = args["npop"] < args["topn"]
56
+ if invalid:
57
+ return dict(status="fail", loss=float("inf"))
58
+
59
+ args["timeout_in_seconds"] = timeout_in_seconds
60
+ args["julia_project"] = julia_project
61
+ args["procs"] = 4
62
+
63
+ # Create the dataset:
64
+ rstate = np.random.RandomState(0)
65
+ X = 3 * rstate.randn(200, 5)
66
+ y = np.cos(2.3 * X[:, 0]) * np.sin(2.3 * X[:, 0] * X[:, 1] * X[:, 2])
67
+
68
+ # Old datasets:
69
+ # eval_str = [
70
+ # "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5",
71
+ # "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)",
72
+ # "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)",
73
+ # "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0",
74
+ # ]
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  ntrials = 3
77
+ losses = []
78
+ for i in range(ntrials):
79
+ # Create the model:
80
+ model = PySRRegressor(**args)
81
 
82
+ # Run the model:
83
+ model.fit(X, y)
84
 
85
+ # Compute loss:
86
+ cur_loss = float(model.get_best()["loss"])
87
+ losses.append(cur_loss)
 
 
 
88
 
89
+ loss = np.median(losses)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  print(f"Finished with {loss}", str(args))
91
 
92
  return {"status": "ok", "loss": loss} # or 'fail' if nan loss
93
 
94
 
95
+ space = dict(
96
+ # model_selection="best",
97
+ model_selection=hp.choice("model_selection", ["accuracy"]),
98
+ # binary_operators=None,
99
+ binary_operators=hp.choice("binary_operators", [binary_operators]),
100
+ # unary_operators=None,
101
+ unary_operators=hp.choice("unary_operators", [unary_operators]),
102
+ # populations=100,
103
+ populations=hp.qloguniform("populations", np.log(10), np.log(1000), 1),
104
+ # niterations=4,
105
+ niterations=hp.choice(
106
+ "niterations", [10000]
107
+ ), # We will quit automatically based on a clock.
108
+ # ncyclesperiteration=100,
109
+ ncyclesperiteration=hp.qloguniform(
110
+ "ncyclesperiteration", np.log(10), np.log(5000), 1
111
+ ),
112
+ # alpha=0.1,
113
+ alpha=hp.loguniform("alpha", np.log(0.0001), np.log(1000)),
114
+ # annealing=False,
115
+ annealing=hp.choice("annealing", [False, True]),
116
+ # fractionReplaced=0.01,
117
+ fractionReplaced=hp.loguniform("fractionReplaced", np.log(0.0001), np.log(0.5)),
118
+ # fractionReplacedHof=0.005,
119
+ fractionReplacedHof=hp.loguniform(
120
+ "fractionReplacedHof", np.log(0.0001), np.log(0.5)
121
+ ),
122
+ # npop=100,
123
+ npop=hp.qloguniform("npop", np.log(20), np.log(1000), 1),
124
+ # parsimony=1e-4,
125
+ parsimony=hp.loguniform("parsimony", np.log(0.0001), np.log(0.5)),
126
+ # topn=10,
127
+ topn=hp.qloguniform("topn", np.log(2), np.log(50), 1),
128
+ # weightAddNode=1,
129
+ weightAddNode=hp.loguniform("weightAddNode", np.log(0.0001), np.log(100)),
130
+ # weightInsertNode=3,
131
+ weightInsertNode=hp.loguniform("weightInsertNode", np.log(0.0001), np.log(100)),
132
+ # weightDeleteNode=3,
133
+ weightDeleteNode=hp.loguniform("weightDeleteNode", np.log(0.0001), np.log(100)),
134
+ # weightDoNothing=1,
135
+ weightDoNothing=hp.loguniform("weightDoNothing", np.log(0.0001), np.log(100)),
136
+ # weightMutateConstant=10,
137
+ weightMutateConstant=hp.loguniform(
138
+ "weightMutateConstant", np.log(0.0001), np.log(100)
139
+ ),
140
+ # weightMutateOperator=1,
141
+ weightMutateOperator=hp.loguniform(
142
+ "weightMutateOperator", np.log(0.0001), np.log(100)
143
+ ),
144
+ # weightRandomize=1,
145
+ weightRandomize=hp.loguniform("weightRandomize", np.log(0.0001), np.log(100)),
146
+ # weightSimplify=0.002,
147
+ weightSimplify=hp.choice("weightSimplify", [0.002]), # One of these is fixed.
148
+ # perturbationFactor=1.0,
149
+ perturbationFactor=hp.loguniform("perturbationFactor", np.log(0.0001), np.log(100)),
150
+ # maxsize=20,
151
+ maxsize=hp.choice("maxsize", [20]),
152
+ # warmupMaxsizeBy=0.0,
153
+ warmupMaxsizeBy=hp.uniform("warmupMaxsizeBy", 0.0, 0.5),
154
+ # useFrequency=True,
155
+ useFrequency=hp.choice("useFrequency", [True, False]),
156
+ # optimizer_nrestarts=3,
157
+ optimizer_nrestarts=hp.quniform("optimizer_nrestarts", 1, 10, 1),
158
+ # optimize_probability=1.0,
159
+ optimize_probability=hp.uniform("optimize_probability", 0.0, 1.0),
160
+ # optimizer_iterations=10,
161
+ optimizer_iterations=hp.quniform("optimizer_iterations", 1, 10, 1),
162
+ # tournament_selection_p=1.0,
163
+ tournament_selection_p=hp.uniform("tournament_selection_p", 0.0, 1.0),
164
+ )
165
 
166
  ################################################################################
167
 
 
232
  max_evals=n + len(trials.trials),
233
  trials=trials,
234
  verbose=1,
235
+ rstate=np.random.default_rng(np.random.randint(1, 10 ** 6)),
236
  )
237
  except hyperopt.exceptions.AllTrialsFailed:
238
  continue
 
242
 
243
  # Merge with empty trials dataset:
244
  save_trials = merge_trials(hyperopt_trial, trials.trials[-n:])
245
+ new_fname = TRIALS_FOLDER + "/" + str(np.random.randint(0, sys.maxsize)) + ".pkl"
 
 
 
 
 
 
246
  pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb"))
247
  loaded_fnames.append(new_fname)