Dhananjay Ashok commited on
Commit
4949116
·
unverified ·
2 Parent(s): e30b74d d7fdb9c

Merge pull request #2 from DhananjayAshok/refactoring

Browse files

Refactoring pf sr.py and integration with Miles's changes last night

Files changed (1) hide show
  1. pysr/sr.py +252 -182
pysr/sr.py CHANGED
@@ -191,15 +191,9 @@ def pysr(X=None, y=None, weights=None,
191
  (as strings).
192
 
193
  """
194
- if threads is not None:
195
- raise ValueError("The threads kwarg is deprecated. Use procs.")
196
- if limitPowComplexity:
197
- raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
198
- if maxdepth is None:
199
- maxdepth = maxsize
200
- if equation_file is None:
201
- date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
202
- equation_file = 'hall_of_fame_' + date_time + '.csv'
203
 
204
  if isinstance(X, pd.DataFrame):
205
  variable_names = list(X.columns)
@@ -210,128 +204,144 @@ def pysr(X=None, y=None, weights=None,
210
  if len(X.shape) == 1:
211
  X = X[:, None]
212
 
213
- # Check for potential errors before they happen
214
- assert len(unary_operators) + len(binary_operators) > 0
215
- assert len(X.shape) == 2
216
- assert len(y.shape) == 1
217
- assert X.shape[0] == y.shape[0]
218
- if weights is not None:
219
- assert len(weights.shape) == 1
220
- assert X.shape[0] == weights.shape[0]
221
- if use_custom_variable_names:
222
- assert len(variable_names) == X.shape[1]
223
 
224
- if select_k_features is not None:
225
- selection = run_feature_selection(X, y, select_k_features)
226
- print(f"Using features {selection}")
227
- X = X[:, selection]
228
-
229
- if use_custom_variable_names:
230
- variable_names = [variable_names[selection[i]] for i in range(len(selection))]
231
 
 
 
 
 
 
232
  if populations is None:
233
  populations = procs
234
-
235
- if isinstance(binary_operators, str): binary_operators = [binary_operators]
236
- if isinstance(unary_operators, str): unary_operators = [unary_operators]
237
-
238
  if X is None:
239
- if test == 'simple1':
240
- eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
241
- elif test == 'simple2':
242
- eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
243
- elif test == 'simple3':
244
- eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
245
- elif test == 'simple4':
246
- eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
247
- elif test == 'simple5':
248
- eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
249
-
250
- X = np.random.randn(100, 5)*3
251
- y = eval(eval_str)
252
- print("Running on", eval_str)
253
-
254
- # System-independent paths
255
- pkg_directory = Path(__file__).parents[1] / 'julia'
256
- pkg_filename = pkg_directory / "sr.jl"
257
- operator_filename = pkg_directory / "Operators.jl"
258
- julia_auxiliaries = [
259
- "Equation.jl", "ProgramConstants.jl",
260
- "LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
261
- "MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
262
- "HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
263
- "Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
264
- "ConstantOptimization.jl"
265
- ]
266
- julia_auxiliary_filenames = [
267
- pkg_directory / fname
268
- for fname in julia_auxiliaries
269
- ]
270
-
271
- tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
272
- hyperparam_filename = tmpdir / f'hyperparams.jl'
273
- dataset_filename = tmpdir / f'dataset.jl'
274
- auxiliary_filename = tmpdir / f'auxiliary.jl'
275
- runfile_filename = tmpdir / f'runfile.jl'
276
- X_filename = tmpdir / "X.csv"
277
- y_filename = tmpdir / "y.csv"
278
- weights_filename = tmpdir / "weights.csv"
279
 
280
  def_hyperparams = ""
281
 
282
  # Add pre-defined functions to Julia
283
- for op_list in [binary_operators, unary_operators]:
284
- for i in range(len(op_list)):
285
- op = op_list[i]
286
- is_user_defined_operator = '(' in op
287
 
288
- if is_user_defined_operator:
289
- def_hyperparams += op + "\n"
290
- # Cut off from the first non-alphanumeric char:
291
- first_non_char = [
292
- j for j in range(len(op))
293
- if not (op[j].isalpha() or op[j].isdigit())][0]
294
- function_name = op[:first_non_char]
295
- op_list[i] = function_name
296
 
297
  #arbitrary complexity by default
298
- for op in unary_operators:
299
- if op not in constraints:
300
- constraints[op] = -1
301
- for op in binary_operators:
302
- if op not in constraints:
303
- constraints[op] = (-1, -1)
304
- if op in ['plus', 'sub']:
305
- if constraints[op][0] != constraints[op][1]:
306
- raise NotImplementedError("You need equal constraints on both sides for - and *, due to simplification strategies.")
307
- elif op == 'mult':
308
- # Make sure the complex expression is in the left side.
309
- if constraints[op][0] == -1:
310
- continue
311
- elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
312
- constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
313
 
314
- constraints_str = "const una_constraints = ["
315
- first = True
316
- for op in unary_operators:
317
- val = constraints[op]
318
- if not first:
319
- constraints_str += ", "
320
- constraints_str += f"{val:d}"
321
- first = False
322
 
323
- constraints_str += """]
324
- const bin_constraints = ["""
325
 
326
- first = True
327
- for op in binary_operators:
328
- tup = constraints[op]
329
- if not first:
330
- constraints_str += ", "
331
- constraints_str += f"({tup[0]:d}, {tup[1]:d})"
332
- first = False
333
- constraints_str += "]"
 
 
 
 
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
336
  {constraints_str}
337
  const binops = {'[' + ', '.join(binary_operators) + ']'}
@@ -370,7 +380,6 @@ const warmupMaxsize = {warmupMaxsize:d}
370
  const limitPowComplexity = {"true" if limitPowComplexity else "false"}
371
  const useFrequency = {"true" if useFrequency else "false"}
372
  """
373
-
374
  op_runner = ""
375
  if len(binary_operators) > 0:
376
  op_runner += """
@@ -381,14 +390,13 @@ const useFrequency = {"true" if useFrequency else "false"}
381
  end"""
382
  for i in range(1, len(binary_operators)):
383
  op_runner += f"""
384
- elseif i === {i+1}
385
  @inbounds @simd for j=1:clen
386
  x[j] = {binary_operators[i]}(x[j], y[j])
387
  end"""
388
  op_runner += """
389
  end
390
  end"""
391
-
392
  if len(unary_operators) > 0:
393
  op_runner += """
394
  @inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
@@ -398,93 +406,155 @@ end"""
398
  end"""
399
  for i in range(1, len(unary_operators)):
400
  op_runner += f"""
401
- elseif i === {i+1}
402
  @inbounds @simd for j=1:clen
403
  x[j] = {unary_operators[i]}(x[j])
404
  end"""
405
  op_runner += """
406
  end
407
  end"""
408
-
409
  def_hyperparams += op_runner
 
 
 
 
410
 
411
- def_auxiliary = '\n'.join([
412
- f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
413
- ])
414
-
415
- def_datasets = """using DelimitedFiles"""
416
-
417
- np.savetxt(X_filename, X, delimiter=',')
418
- np.savetxt(y_filename, y, delimiter=',')
419
- if weights is not None:
420
- np.savetxt(weights_filename, weights, delimiter=',')
421
 
422
- def_datasets += f"""
423
- const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
424
- const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
- if weights is not None:
427
- def_datasets += f"""
428
- const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
429
 
430
- if use_custom_variable_names:
431
- def_hyperparams += f"""
432
- const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
- with open(hyperparam_filename, 'w') as f:
435
- print(def_hyperparams, file=f)
436
 
437
- with open(dataset_filename, 'w') as f:
438
- print(def_datasets, file=f)
 
 
 
439
 
440
- with open(auxiliary_filename, 'w') as f:
441
- print(def_auxiliary, file=f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
- with open(runfile_filename, 'w') as f:
444
- print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
445
- print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
446
- print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
447
- print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
448
- print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
449
- print(f'rmprocs(nprocs)', file=f)
450
 
451
 
452
- command = [
453
- f'julia', f'-O{julia_optimization:d}',
454
- f'-p', f'{procs}',
455
- str(runfile_filename),
456
- ]
457
- if timeout is not None:
458
- command = [f'timeout', f'{timeout}'] + command
 
 
 
 
 
 
 
 
 
 
459
 
460
- global global_n_features
461
- global global_equation_file
462
- global global_variable_names
463
- global global_extra_sympy_mappings
464
 
465
- global_n_features = X.shape[1]
466
- global_equation_file = equation_file
467
- global_variable_names = variable_names
468
- global_extra_sympy_mappings = extra_sympy_mappings
 
 
 
 
 
 
469
 
470
- print("Running on", ' '.join(command))
471
- process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
472
- try:
473
- while True:
474
- line = process.stdout.readline()
475
- if not line: break
476
- print(line.decode('utf-8').replace('\n', ''))
477
 
478
- process.stdout.close()
479
- process.wait()
480
- except KeyboardInterrupt:
481
- print("Killing process... will return when done.")
482
- process.kill()
 
 
 
 
 
 
483
 
484
- if delete_tempfiles:
485
- shutil.rmtree(tmpdir)
486
 
487
- return get_hof()
 
 
 
 
488
 
489
 
490
  def run_feature_selection(X, y, select_k_features):
 
191
  (as strings).
192
 
193
  """
194
+ raise_depreciation_errors(limitPowComplexity, threads)
195
+ auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, operator_filename \
196
+ ,pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename = set_paths(tempdir)
 
 
 
 
 
 
197
 
198
  if isinstance(X, pd.DataFrame):
199
  variable_names = list(X.columns)
 
204
  if len(X.shape) == 1:
205
  X = X[:, None]
206
 
207
+ check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y)
 
 
 
 
 
 
 
 
 
208
 
209
+ X, variable_names = handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y)
 
 
 
 
 
 
210
 
211
+ if maxdepth is None:
212
+ maxdepth = maxsize
213
+ if equation_file is None:
214
+ date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
215
+ equation_file = 'hall_of_fame_' + date_time + '.csv'
216
  if populations is None:
217
  populations = procs
218
+ if isinstance(binary_operators, str):
219
+ binary_operators = [binary_operators]
220
+ if isinstance(unary_operators, str):
221
+ unary_operators = [unary_operators]
222
  if X is None:
223
+ X, y = using_test_input(X, test, y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  def_hyperparams = ""
226
 
227
  # Add pre-defined functions to Julia
228
+ def_hyperparams = predefined_function_addition(binary_operators, def_hyperparams, unary_operators)
 
 
 
229
 
 
 
 
 
 
 
 
 
230
 
231
  #arbitrary complexity by default
232
+ handle_constraints(binary_operators, constraints, unary_operators)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
 
 
 
 
 
 
 
235
 
 
 
236
 
237
+ def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
238
+ constraints_str, def_hyperparams, equation_file, fast_cycle,
239
+ fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
240
+ maxsize, migration, nrestarts, operator_filename, parsimony,
241
+ perturbationFactor, populations, procs, shouldOptimizeConstants,
242
+ unary_operators, useFrequency, use_custom_variable_names, variable_names,
243
+ warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
244
+ weightInsertNode, weightMutateConstant, weightMutateOperator,
245
+ weightRandomize, weightSimplify, weights)
246
+ def_auxiliary = make_auxiliary_julia_str(julia_auxiliary_filenames)
247
+
248
+ def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
249
 
250
+ create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
251
+ ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
252
+
253
+ final_pysr_process(julia_optimization, procs, runfile_filename, timeout)
254
+
255
+ set_globals(X, equation_file, extra_sympy_mappings, variable_names)
256
+
257
+ if delete_tempfiles:
258
+ shutil.rmtree(tmpdir)
259
+
260
+ return get_hof()
261
+
262
+
263
+ def make_auxiliary_julia_str(julia_auxiliary_filenames):
264
+ def_auxiliary = '\n'.join([
265
+ f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
266
+ ])
267
+ return def_auxiliary
268
+
269
+
270
+ def set_globals(X, equation_file, extra_sympy_mappings, variable_names):
271
+ global global_n_features
272
+ global global_equation_file
273
+ global global_variable_names
274
+ global global_extra_sympy_mappings
275
+ global_n_features = X.shape[1]
276
+ global_equation_file = equation_file
277
+ global_variable_names = variable_names
278
+ global_extra_sympy_mappings = extra_sympy_mappings
279
+
280
+
281
+ def final_pysr_process(julia_optimization, procs, runfile_filename, timeout):
282
+ command = [
283
+ f'julia', f'-O{julia_optimization:d}',
284
+ f'-p', f'{procs}',
285
+ str(runfile_filename),
286
+ ]
287
+ if timeout is not None:
288
+ command = [f'timeout', f'{timeout}'] + command
289
+ print("Running on", ' '.join(command))
290
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
291
+ try:
292
+ while True:
293
+ line = process.stdout.readline()
294
+ if not line: break
295
+ print(line.decode('utf-8').replace('\n', ''))
296
+
297
+ process.stdout.close()
298
+ process.wait()
299
+ except KeyboardInterrupt:
300
+ print("Killing process... will return when done.")
301
+ process.kill()
302
+
303
+
304
+ def create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
305
+ ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
306
+ with open(hyperparam_filename, 'w') as f:
307
+ print(def_hyperparams, file=f)
308
+ with open(dataset_filename, 'w') as f:
309
+ print(def_datasets, file=f)
310
+ with open(auxiliary_filename, 'w') as f:
311
+ print(def_auxiliary, file=f)
312
+ with open(runfile_filename, 'w') as f:
313
+ print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
314
+ print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
315
+ print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
316
+ print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
317
+ print(
318
+ f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
319
+ file=f)
320
+ print(f'rmprocs(nprocs)', file=f)
321
+
322
+
323
+ def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
324
+ def_datasets = """using DelimitedFiles"""
325
+ np.savetxt(X_filename, X, delimiter=',')
326
+ np.savetxt(y_filename, y, delimiter=',')
327
+ if weights is not None:
328
+ np.savetxt(weights_filename, weights, delimiter=',')
329
+ def_datasets += f"""
330
+ const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
331
+ const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
332
+ if weights is not None:
333
+ def_datasets += f"""
334
+ const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
335
+ return def_datasets
336
+
337
+
338
+ def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
339
+ def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
340
+ limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
341
+ parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
342
+ unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
343
+ weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
344
+ weightMutateOperator, weightRandomize, weightSimplify, weights):
345
  def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
346
  {constraints_str}
347
  const binops = {'[' + ', '.join(binary_operators) + ']'}
 
380
  const limitPowComplexity = {"true" if limitPowComplexity else "false"}
381
  const useFrequency = {"true" if useFrequency else "false"}
382
  """
 
383
  op_runner = ""
384
  if len(binary_operators) > 0:
385
  op_runner += """
 
390
  end"""
391
  for i in range(1, len(binary_operators)):
392
  op_runner += f"""
393
+ elseif i === {i + 1}
394
  @inbounds @simd for j=1:clen
395
  x[j] = {binary_operators[i]}(x[j], y[j])
396
  end"""
397
  op_runner += """
398
  end
399
  end"""
 
400
  if len(unary_operators) > 0:
401
  op_runner += """
402
  @inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
 
406
  end"""
407
  for i in range(1, len(unary_operators)):
408
  op_runner += f"""
409
+ elseif i === {i + 1}
410
  @inbounds @simd for j=1:clen
411
  x[j] = {unary_operators[i]}(x[j])
412
  end"""
413
  op_runner += """
414
  end
415
  end"""
 
416
  def_hyperparams += op_runner
417
+ if use_custom_variable_names:
418
+ def_hyperparams += f"""
419
+ const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
420
+ return def_hyperparams
421
 
 
 
 
 
 
 
 
 
 
 
422
 
423
+ def make_constraints_str(binary_operators, constraints, unary_operators):
424
+ constraints_str = "const una_constraints = ["
425
+ first = True
426
+ for op in unary_operators:
427
+ val = constraints[op]
428
+ if not first:
429
+ constraints_str += ", "
430
+ constraints_str += f"{val:d}"
431
+ first = False
432
+ constraints_str += """]
433
+ const bin_constraints = ["""
434
+ first = True
435
+ for op in binary_operators:
436
+ tup = constraints[op]
437
+ if not first:
438
+ constraints_str += ", "
439
+ constraints_str += f"({tup[0]:d}, {tup[1]:d})"
440
+ first = False
441
+ constraints_str += "]"
442
+ return constraints_str
443
 
 
 
 
444
 
445
+ def handle_constraints(binary_operators, constraints, unary_operators):
446
+ for op in unary_operators:
447
+ if op not in constraints:
448
+ constraints[op] = -1
449
+ for op in binary_operators:
450
+ if op not in constraints:
451
+ constraints[op] = (-1, -1)
452
+ if op in ['plus', 'sub']:
453
+ if constraints[op][0] != constraints[op][1]:
454
+ raise NotImplementedError(
455
+ "You need equal constraints on both sides for - and *, due to simplification strategies.")
456
+ elif op == 'mult':
457
+ # Make sure the complex expression is in the left side.
458
+ if constraints[op][0] == -1:
459
+ continue
460
+ elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
461
+ constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
462
 
 
 
463
 
464
+ def predefined_function_addition(binary_operators, def_hyperparams, unary_operators):
465
+ for op_list in [binary_operators, unary_operators]:
466
+ for i in range(len(op_list)):
467
+ op = op_list[i]
468
+ is_user_defined_operator = '(' in op
469
 
470
+ if is_user_defined_operator:
471
+ def_hyperparams += op + "\n"
472
+ # Cut off from the first non-alphanumeric char:
473
+ first_non_char = [
474
+ j for j in range(len(op))
475
+ if not (op[j].isalpha() or op[j].isdigit())][0]
476
+ function_name = op[:first_non_char]
477
+ op_list[i] = function_name
478
+ return def_hyperparams
479
+
480
+
481
+ def using_test_input(X, test, y):
482
+ if test == 'simple1':
483
+ eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
484
+ elif test == 'simple2':
485
+ eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
486
+ elif test == 'simple3':
487
+ eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
488
+ elif test == 'simple4':
489
+ eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
490
+ elif test == 'simple5':
491
+ eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
492
+ X = np.random.randn(100, 5) * 3
493
+ y = eval(eval_str)
494
+ print("Running on", eval_str)
495
+ return X, y
496
+
497
+
498
+ def handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y):
499
+ if select_k_features is not None:
500
+ selection = run_feature_selection(X, y, select_k_features)
501
+ print(f"Using features {selection}")
502
+ X = X[:, selection]
503
 
504
+ if use_custom_variable_names:
505
+ variable_names = [variable_names[selection[i]] for i in range(len(selection))]
506
+ return X, variable_names
 
 
 
 
507
 
508
 
509
+ def set_paths(tempdir):
510
+ # System-independent paths
511
+ pkg_directory = Path(__file__).parents[1] / 'julia'
512
+ pkg_filename = pkg_directory / "sr.jl"
513
+ operator_filename = pkg_directory / "Operators.jl"
514
+ julia_auxiliaries = [
515
+ "Equation.jl", "ProgramConstants.jl",
516
+ "LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
517
+ "MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
518
+ "HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
519
+ "Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
520
+ "ConstantOptimization.jl"
521
+ ]
522
+ julia_auxiliary_filenames = [
523
+ pkg_directory / fname
524
+ for fname in julia_auxiliaries
525
+ ]
526
 
 
 
 
 
527
 
528
+ tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
529
+ hyperparam_filename = tmpdir / f'hyperparams.jl'
530
+ dataset_filename = tmpdir / f'dataset.jl'
531
+ auxiliary_filename = tmpdir / f'auxiliary.jl'
532
+ runfile_filename = tmpdir / f'runfile.jl'
533
+ X_filename = tmpdir / "X.csv"
534
+ y_filename = tmpdir / "y.csv"
535
+ weights_filename = tmpdir / "weights.csv"
536
+ return auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, \
537
+ operator_filename, pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename
538
 
 
 
 
 
 
 
 
539
 
540
+ def check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y):
541
+ # Check for potential errors before they happen
542
+ assert len(unary_operators) + len(binary_operators) > 0
543
+ assert len(X.shape) == 2
544
+ assert len(y.shape) == 1
545
+ assert X.shape[0] == y.shape[0]
546
+ if weights is not None:
547
+ assert len(weights.shape) == 1
548
+ assert X.shape[0] == weights.shape[0]
549
+ if use_custom_variable_names:
550
+ assert len(variable_names) == X.shape[1]
551
 
 
 
552
 
553
+ def raise_depreciation_errors(limitPowComplexity, threads):
554
+ if threads is not None:
555
+ raise ValueError("The threads kwarg is deprecated. Use procs.")
556
+ if limitPowComplexity:
557
+ raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
558
 
559
 
560
  def run_feature_selection(X, y, select_k_features):