Spaces:
Running
Running
Merge pull request #2 from DhananjayAshok/refactoring
Browse filesRefactoring pf sr.py and integration with Miles's changes last night
- pysr/sr.py +252 -182
pysr/sr.py
CHANGED
@@ -191,15 +191,9 @@ def pysr(X=None, y=None, weights=None,
|
|
191 |
(as strings).
|
192 |
|
193 |
"""
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
|
198 |
-
if maxdepth is None:
|
199 |
-
maxdepth = maxsize
|
200 |
-
if equation_file is None:
|
201 |
-
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
202 |
-
equation_file = 'hall_of_fame_' + date_time + '.csv'
|
203 |
|
204 |
if isinstance(X, pd.DataFrame):
|
205 |
variable_names = list(X.columns)
|
@@ -210,128 +204,144 @@ def pysr(X=None, y=None, weights=None,
|
|
210 |
if len(X.shape) == 1:
|
211 |
X = X[:, None]
|
212 |
|
213 |
-
|
214 |
-
assert len(unary_operators) + len(binary_operators) > 0
|
215 |
-
assert len(X.shape) == 2
|
216 |
-
assert len(y.shape) == 1
|
217 |
-
assert X.shape[0] == y.shape[0]
|
218 |
-
if weights is not None:
|
219 |
-
assert len(weights.shape) == 1
|
220 |
-
assert X.shape[0] == weights.shape[0]
|
221 |
-
if use_custom_variable_names:
|
222 |
-
assert len(variable_names) == X.shape[1]
|
223 |
|
224 |
-
|
225 |
-
selection = run_feature_selection(X, y, select_k_features)
|
226 |
-
print(f"Using features {selection}")
|
227 |
-
X = X[:, selection]
|
228 |
-
|
229 |
-
if use_custom_variable_names:
|
230 |
-
variable_names = [variable_names[selection[i]] for i in range(len(selection))]
|
231 |
|
|
|
|
|
|
|
|
|
|
|
232 |
if populations is None:
|
233 |
populations = procs
|
234 |
-
|
235 |
-
|
236 |
-
if isinstance(unary_operators, str):
|
237 |
-
|
238 |
if X is None:
|
239 |
-
|
240 |
-
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
|
241 |
-
elif test == 'simple2':
|
242 |
-
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
|
243 |
-
elif test == 'simple3':
|
244 |
-
eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
|
245 |
-
elif test == 'simple4':
|
246 |
-
eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
|
247 |
-
elif test == 'simple5':
|
248 |
-
eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
|
249 |
-
|
250 |
-
X = np.random.randn(100, 5)*3
|
251 |
-
y = eval(eval_str)
|
252 |
-
print("Running on", eval_str)
|
253 |
-
|
254 |
-
# System-independent paths
|
255 |
-
pkg_directory = Path(__file__).parents[1] / 'julia'
|
256 |
-
pkg_filename = pkg_directory / "sr.jl"
|
257 |
-
operator_filename = pkg_directory / "Operators.jl"
|
258 |
-
julia_auxiliaries = [
|
259 |
-
"Equation.jl", "ProgramConstants.jl",
|
260 |
-
"LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
|
261 |
-
"MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
|
262 |
-
"HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
|
263 |
-
"Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
|
264 |
-
"ConstantOptimization.jl"
|
265 |
-
]
|
266 |
-
julia_auxiliary_filenames = [
|
267 |
-
pkg_directory / fname
|
268 |
-
for fname in julia_auxiliaries
|
269 |
-
]
|
270 |
-
|
271 |
-
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
272 |
-
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
273 |
-
dataset_filename = tmpdir / f'dataset.jl'
|
274 |
-
auxiliary_filename = tmpdir / f'auxiliary.jl'
|
275 |
-
runfile_filename = tmpdir / f'runfile.jl'
|
276 |
-
X_filename = tmpdir / "X.csv"
|
277 |
-
y_filename = tmpdir / "y.csv"
|
278 |
-
weights_filename = tmpdir / "weights.csv"
|
279 |
|
280 |
def_hyperparams = ""
|
281 |
|
282 |
# Add pre-defined functions to Julia
|
283 |
-
|
284 |
-
for i in range(len(op_list)):
|
285 |
-
op = op_list[i]
|
286 |
-
is_user_defined_operator = '(' in op
|
287 |
|
288 |
-
if is_user_defined_operator:
|
289 |
-
def_hyperparams += op + "\n"
|
290 |
-
# Cut off from the first non-alphanumeric char:
|
291 |
-
first_non_char = [
|
292 |
-
j for j in range(len(op))
|
293 |
-
if not (op[j].isalpha() or op[j].isdigit())][0]
|
294 |
-
function_name = op[:first_non_char]
|
295 |
-
op_list[i] = function_name
|
296 |
|
297 |
#arbitrary complexity by default
|
298 |
-
|
299 |
-
if op not in constraints:
|
300 |
-
constraints[op] = -1
|
301 |
-
for op in binary_operators:
|
302 |
-
if op not in constraints:
|
303 |
-
constraints[op] = (-1, -1)
|
304 |
-
if op in ['plus', 'sub']:
|
305 |
-
if constraints[op][0] != constraints[op][1]:
|
306 |
-
raise NotImplementedError("You need equal constraints on both sides for - and *, due to simplification strategies.")
|
307 |
-
elif op == 'mult':
|
308 |
-
# Make sure the complex expression is in the left side.
|
309 |
-
if constraints[op][0] == -1:
|
310 |
-
continue
|
311 |
-
elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
|
312 |
-
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
313 |
|
314 |
-
constraints_str =
|
315 |
-
first = True
|
316 |
-
for op in unary_operators:
|
317 |
-
val = constraints[op]
|
318 |
-
if not first:
|
319 |
-
constraints_str += ", "
|
320 |
-
constraints_str += f"{val:d}"
|
321 |
-
first = False
|
322 |
|
323 |
-
constraints_str += """]
|
324 |
-
const bin_constraints = ["""
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
|
|
|
|
|
|
|
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
336 |
{constraints_str}
|
337 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
@@ -370,7 +380,6 @@ const warmupMaxsize = {warmupMaxsize:d}
|
|
370 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
371 |
const useFrequency = {"true" if useFrequency else "false"}
|
372 |
"""
|
373 |
-
|
374 |
op_runner = ""
|
375 |
if len(binary_operators) > 0:
|
376 |
op_runner += """
|
@@ -381,14 +390,13 @@ const useFrequency = {"true" if useFrequency else "false"}
|
|
381 |
end"""
|
382 |
for i in range(1, len(binary_operators)):
|
383 |
op_runner += f"""
|
384 |
-
elseif i === {i+1}
|
385 |
@inbounds @simd for j=1:clen
|
386 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
387 |
end"""
|
388 |
op_runner += """
|
389 |
end
|
390 |
end"""
|
391 |
-
|
392 |
if len(unary_operators) > 0:
|
393 |
op_runner += """
|
394 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
@@ -398,93 +406,155 @@ end"""
|
|
398 |
end"""
|
399 |
for i in range(1, len(unary_operators)):
|
400 |
op_runner += f"""
|
401 |
-
elseif i === {i+1}
|
402 |
@inbounds @simd for j=1:clen
|
403 |
x[j] = {unary_operators[i]}(x[j])
|
404 |
end"""
|
405 |
op_runner += """
|
406 |
end
|
407 |
end"""
|
408 |
-
|
409 |
def_hyperparams += op_runner
|
|
|
|
|
|
|
|
|
410 |
|
411 |
-
def_auxiliary = '\n'.join([
|
412 |
-
f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
|
413 |
-
])
|
414 |
-
|
415 |
-
def_datasets = """using DelimitedFiles"""
|
416 |
-
|
417 |
-
np.savetxt(X_filename, X, delimiter=',')
|
418 |
-
np.savetxt(y_filename, y, delimiter=',')
|
419 |
-
if weights is not None:
|
420 |
-
np.savetxt(weights_filename, weights, delimiter=',')
|
421 |
|
422 |
-
|
423 |
-
|
424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
426 |
-
if weights is not None:
|
427 |
-
def_datasets += f"""
|
428 |
-
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
429 |
|
430 |
-
|
431 |
-
|
432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
|
434 |
-
with open(hyperparam_filename, 'w') as f:
|
435 |
-
print(def_hyperparams, file=f)
|
436 |
|
437 |
-
|
438 |
-
|
|
|
|
|
|
|
439 |
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
|
447 |
-
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
448 |
-
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
449 |
-
print(f'rmprocs(nprocs)', file=f)
|
450 |
|
451 |
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
|
460 |
-
global global_n_features
|
461 |
-
global global_equation_file
|
462 |
-
global global_variable_names
|
463 |
-
global global_extra_sympy_mappings
|
464 |
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
469 |
|
470 |
-
print("Running on", ' '.join(command))
|
471 |
-
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
472 |
-
try:
|
473 |
-
while True:
|
474 |
-
line = process.stdout.readline()
|
475 |
-
if not line: break
|
476 |
-
print(line.decode('utf-8').replace('\n', ''))
|
477 |
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
-
if delete_tempfiles:
|
485 |
-
shutil.rmtree(tmpdir)
|
486 |
|
487 |
-
|
|
|
|
|
|
|
|
|
488 |
|
489 |
|
490 |
def run_feature_selection(X, y, select_k_features):
|
|
|
191 |
(as strings).
|
192 |
|
193 |
"""
|
194 |
+
raise_depreciation_errors(limitPowComplexity, threads)
|
195 |
+
auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, operator_filename \
|
196 |
+
,pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename = set_paths(tempdir)
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
if isinstance(X, pd.DataFrame):
|
199 |
variable_names = list(X.columns)
|
|
|
204 |
if len(X.shape) == 1:
|
205 |
X = X[:, None]
|
206 |
|
207 |
+
check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
+
X, variable_names = handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
+
if maxdepth is None:
|
212 |
+
maxdepth = maxsize
|
213 |
+
if equation_file is None:
|
214 |
+
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
215 |
+
equation_file = 'hall_of_fame_' + date_time + '.csv'
|
216 |
if populations is None:
|
217 |
populations = procs
|
218 |
+
if isinstance(binary_operators, str):
|
219 |
+
binary_operators = [binary_operators]
|
220 |
+
if isinstance(unary_operators, str):
|
221 |
+
unary_operators = [unary_operators]
|
222 |
if X is None:
|
223 |
+
X, y = using_test_input(X, test, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
def_hyperparams = ""
|
226 |
|
227 |
# Add pre-defined functions to Julia
|
228 |
+
def_hyperparams = predefined_function_addition(binary_operators, def_hyperparams, unary_operators)
|
|
|
|
|
|
|
229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
#arbitrary complexity by default
|
232 |
+
handle_constraints(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
+
constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
|
|
|
|
236 |
|
237 |
+
def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
|
238 |
+
constraints_str, def_hyperparams, equation_file, fast_cycle,
|
239 |
+
fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
|
240 |
+
maxsize, migration, nrestarts, operator_filename, parsimony,
|
241 |
+
perturbationFactor, populations, procs, shouldOptimizeConstants,
|
242 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names,
|
243 |
+
warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
|
244 |
+
weightInsertNode, weightMutateConstant, weightMutateOperator,
|
245 |
+
weightRandomize, weightSimplify, weights)
|
246 |
+
def_auxiliary = make_auxiliary_julia_str(julia_auxiliary_filenames)
|
247 |
+
|
248 |
+
def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
|
249 |
|
250 |
+
create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
251 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
|
252 |
+
|
253 |
+
final_pysr_process(julia_optimization, procs, runfile_filename, timeout)
|
254 |
+
|
255 |
+
set_globals(X, equation_file, extra_sympy_mappings, variable_names)
|
256 |
+
|
257 |
+
if delete_tempfiles:
|
258 |
+
shutil.rmtree(tmpdir)
|
259 |
+
|
260 |
+
return get_hof()
|
261 |
+
|
262 |
+
|
263 |
+
def make_auxiliary_julia_str(julia_auxiliary_filenames):
|
264 |
+
def_auxiliary = '\n'.join([
|
265 |
+
f"""include("{_escape_filename(aux_fname)}")""" for aux_fname in julia_auxiliary_filenames
|
266 |
+
])
|
267 |
+
return def_auxiliary
|
268 |
+
|
269 |
+
|
270 |
+
def set_globals(X, equation_file, extra_sympy_mappings, variable_names):
|
271 |
+
global global_n_features
|
272 |
+
global global_equation_file
|
273 |
+
global global_variable_names
|
274 |
+
global global_extra_sympy_mappings
|
275 |
+
global_n_features = X.shape[1]
|
276 |
+
global_equation_file = equation_file
|
277 |
+
global_variable_names = variable_names
|
278 |
+
global_extra_sympy_mappings = extra_sympy_mappings
|
279 |
+
|
280 |
+
|
281 |
+
def final_pysr_process(julia_optimization, procs, runfile_filename, timeout):
|
282 |
+
command = [
|
283 |
+
f'julia', f'-O{julia_optimization:d}',
|
284 |
+
f'-p', f'{procs}',
|
285 |
+
str(runfile_filename),
|
286 |
+
]
|
287 |
+
if timeout is not None:
|
288 |
+
command = [f'timeout', f'{timeout}'] + command
|
289 |
+
print("Running on", ' '.join(command))
|
290 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
|
291 |
+
try:
|
292 |
+
while True:
|
293 |
+
line = process.stdout.readline()
|
294 |
+
if not line: break
|
295 |
+
print(line.decode('utf-8').replace('\n', ''))
|
296 |
+
|
297 |
+
process.stdout.close()
|
298 |
+
process.wait()
|
299 |
+
except KeyboardInterrupt:
|
300 |
+
print("Killing process... will return when done.")
|
301 |
+
process.kill()
|
302 |
+
|
303 |
+
|
304 |
+
def create_julia_files(auxiliary_filename, dataset_filename, def_auxiliary, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
|
305 |
+
ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
|
306 |
+
with open(hyperparam_filename, 'w') as f:
|
307 |
+
print(def_hyperparams, file=f)
|
308 |
+
with open(dataset_filename, 'w') as f:
|
309 |
+
print(def_datasets, file=f)
|
310 |
+
with open(auxiliary_filename, 'w') as f:
|
311 |
+
print(def_auxiliary, file=f)
|
312 |
+
with open(runfile_filename, 'w') as f:
|
313 |
+
print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
|
314 |
+
print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
|
315 |
+
print(f'@everywhere include("{_escape_filename(auxiliary_filename)}")', file=f)
|
316 |
+
print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
|
317 |
+
print(
|
318 |
+
f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
|
319 |
+
file=f)
|
320 |
+
print(f'rmprocs(nprocs)', file=f)
|
321 |
+
|
322 |
+
|
323 |
+
def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
|
324 |
+
def_datasets = """using DelimitedFiles"""
|
325 |
+
np.savetxt(X_filename, X, delimiter=',')
|
326 |
+
np.savetxt(y_filename, y, delimiter=',')
|
327 |
+
if weights is not None:
|
328 |
+
np.savetxt(weights_filename, weights, delimiter=',')
|
329 |
+
def_datasets += f"""
|
330 |
+
const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
|
331 |
+
const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
|
332 |
+
if weights is not None:
|
333 |
+
def_datasets += f"""
|
334 |
+
const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
|
335 |
+
return def_datasets
|
336 |
+
|
337 |
+
|
338 |
+
def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
|
339 |
+
def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
|
340 |
+
limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
|
341 |
+
parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
|
342 |
+
unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
|
343 |
+
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
344 |
+
weightMutateOperator, weightRandomize, weightSimplify, weights):
|
345 |
def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
|
346 |
{constraints_str}
|
347 |
const binops = {'[' + ', '.join(binary_operators) + ']'}
|
|
|
380 |
const limitPowComplexity = {"true" if limitPowComplexity else "false"}
|
381 |
const useFrequency = {"true" if useFrequency else "false"}
|
382 |
"""
|
|
|
383 |
op_runner = ""
|
384 |
if len(binary_operators) > 0:
|
385 |
op_runner += """
|
|
|
390 |
end"""
|
391 |
for i in range(1, len(binary_operators)):
|
392 |
op_runner += f"""
|
393 |
+
elseif i === {i + 1}
|
394 |
@inbounds @simd for j=1:clen
|
395 |
x[j] = {binary_operators[i]}(x[j], y[j])
|
396 |
end"""
|
397 |
op_runner += """
|
398 |
end
|
399 |
end"""
|
|
|
400 |
if len(unary_operators) > 0:
|
401 |
op_runner += """
|
402 |
@inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
|
|
|
406 |
end"""
|
407 |
for i in range(1, len(unary_operators)):
|
408 |
op_runner += f"""
|
409 |
+
elseif i === {i + 1}
|
410 |
@inbounds @simd for j=1:clen
|
411 |
x[j] = {unary_operators[i]}(x[j])
|
412 |
end"""
|
413 |
op_runner += """
|
414 |
end
|
415 |
end"""
|
|
|
416 |
def_hyperparams += op_runner
|
417 |
+
if use_custom_variable_names:
|
418 |
+
def_hyperparams += f"""
|
419 |
+
const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
|
420 |
+
return def_hyperparams
|
421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
|
423 |
+
def make_constraints_str(binary_operators, constraints, unary_operators):
|
424 |
+
constraints_str = "const una_constraints = ["
|
425 |
+
first = True
|
426 |
+
for op in unary_operators:
|
427 |
+
val = constraints[op]
|
428 |
+
if not first:
|
429 |
+
constraints_str += ", "
|
430 |
+
constraints_str += f"{val:d}"
|
431 |
+
first = False
|
432 |
+
constraints_str += """]
|
433 |
+
const bin_constraints = ["""
|
434 |
+
first = True
|
435 |
+
for op in binary_operators:
|
436 |
+
tup = constraints[op]
|
437 |
+
if not first:
|
438 |
+
constraints_str += ", "
|
439 |
+
constraints_str += f"({tup[0]:d}, {tup[1]:d})"
|
440 |
+
first = False
|
441 |
+
constraints_str += "]"
|
442 |
+
return constraints_str
|
443 |
|
|
|
|
|
|
|
444 |
|
445 |
+
def handle_constraints(binary_operators, constraints, unary_operators):
|
446 |
+
for op in unary_operators:
|
447 |
+
if op not in constraints:
|
448 |
+
constraints[op] = -1
|
449 |
+
for op in binary_operators:
|
450 |
+
if op not in constraints:
|
451 |
+
constraints[op] = (-1, -1)
|
452 |
+
if op in ['plus', 'sub']:
|
453 |
+
if constraints[op][0] != constraints[op][1]:
|
454 |
+
raise NotImplementedError(
|
455 |
+
"You need equal constraints on both sides for - and *, due to simplification strategies.")
|
456 |
+
elif op == 'mult':
|
457 |
+
# Make sure the complex expression is in the left side.
|
458 |
+
if constraints[op][0] == -1:
|
459 |
+
continue
|
460 |
+
elif constraints[op][1] == -1 or constraints[op][0] < constraints[op][1]:
|
461 |
+
constraints[op][0], constraints[op][1] = constraints[op][1], constraints[op][0]
|
462 |
|
|
|
|
|
463 |
|
464 |
+
def predefined_function_addition(binary_operators, def_hyperparams, unary_operators):
|
465 |
+
for op_list in [binary_operators, unary_operators]:
|
466 |
+
for i in range(len(op_list)):
|
467 |
+
op = op_list[i]
|
468 |
+
is_user_defined_operator = '(' in op
|
469 |
|
470 |
+
if is_user_defined_operator:
|
471 |
+
def_hyperparams += op + "\n"
|
472 |
+
# Cut off from the first non-alphanumeric char:
|
473 |
+
first_non_char = [
|
474 |
+
j for j in range(len(op))
|
475 |
+
if not (op[j].isalpha() or op[j].isdigit())][0]
|
476 |
+
function_name = op[:first_non_char]
|
477 |
+
op_list[i] = function_name
|
478 |
+
return def_hyperparams
|
479 |
+
|
480 |
+
|
481 |
+
def using_test_input(X, test, y):
|
482 |
+
if test == 'simple1':
|
483 |
+
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5"
|
484 |
+
elif test == 'simple2':
|
485 |
+
eval_str = "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)"
|
486 |
+
elif test == 'simple3':
|
487 |
+
eval_str = "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)"
|
488 |
+
elif test == 'simple4':
|
489 |
+
eval_str = "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4"
|
490 |
+
elif test == 'simple5':
|
491 |
+
eval_str = "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"
|
492 |
+
X = np.random.randn(100, 5) * 3
|
493 |
+
y = eval(eval_str)
|
494 |
+
print("Running on", eval_str)
|
495 |
+
return X, y
|
496 |
+
|
497 |
+
|
498 |
+
def handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y):
|
499 |
+
if select_k_features is not None:
|
500 |
+
selection = run_feature_selection(X, y, select_k_features)
|
501 |
+
print(f"Using features {selection}")
|
502 |
+
X = X[:, selection]
|
503 |
|
504 |
+
if use_custom_variable_names:
|
505 |
+
variable_names = [variable_names[selection[i]] for i in range(len(selection))]
|
506 |
+
return X, variable_names
|
|
|
|
|
|
|
|
|
507 |
|
508 |
|
509 |
+
def set_paths(tempdir):
|
510 |
+
# System-independent paths
|
511 |
+
pkg_directory = Path(__file__).parents[1] / 'julia'
|
512 |
+
pkg_filename = pkg_directory / "sr.jl"
|
513 |
+
operator_filename = pkg_directory / "Operators.jl"
|
514 |
+
julia_auxiliaries = [
|
515 |
+
"Equation.jl", "ProgramConstants.jl",
|
516 |
+
"LossFunctions.jl", "Utils.jl", "EvaluateEquation.jl",
|
517 |
+
"MutationFunctions.jl", "SimplifyEquation.jl", "PopMember.jl",
|
518 |
+
"HallOfFame.jl", "CheckConstraints.jl", "Mutate.jl",
|
519 |
+
"Population.jl", "RegularizedEvolution.jl", "SingleIteration.jl",
|
520 |
+
"ConstantOptimization.jl"
|
521 |
+
]
|
522 |
+
julia_auxiliary_filenames = [
|
523 |
+
pkg_directory / fname
|
524 |
+
for fname in julia_auxiliaries
|
525 |
+
]
|
526 |
|
|
|
|
|
|
|
|
|
527 |
|
528 |
+
tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
|
529 |
+
hyperparam_filename = tmpdir / f'hyperparams.jl'
|
530 |
+
dataset_filename = tmpdir / f'dataset.jl'
|
531 |
+
auxiliary_filename = tmpdir / f'auxiliary.jl'
|
532 |
+
runfile_filename = tmpdir / f'runfile.jl'
|
533 |
+
X_filename = tmpdir / "X.csv"
|
534 |
+
y_filename = tmpdir / "y.csv"
|
535 |
+
weights_filename = tmpdir / "weights.csv"
|
536 |
+
return auxiliary_filename, X_filename, dataset_filename, hyperparam_filename, julia_auxiliary_filenames, \
|
537 |
+
operator_filename, pkg_filename, runfile_filename, tmpdir, weights_filename, y_filename
|
538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
|
540 |
+
def check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y):
|
541 |
+
# Check for potential errors before they happen
|
542 |
+
assert len(unary_operators) + len(binary_operators) > 0
|
543 |
+
assert len(X.shape) == 2
|
544 |
+
assert len(y.shape) == 1
|
545 |
+
assert X.shape[0] == y.shape[0]
|
546 |
+
if weights is not None:
|
547 |
+
assert len(weights.shape) == 1
|
548 |
+
assert X.shape[0] == weights.shape[0]
|
549 |
+
if use_custom_variable_names:
|
550 |
+
assert len(variable_names) == X.shape[1]
|
551 |
|
|
|
|
|
552 |
|
553 |
+
def raise_depreciation_errors(limitPowComplexity, threads):
|
554 |
+
if threads is not None:
|
555 |
+
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
556 |
+
if limitPowComplexity:
|
557 |
+
raise ValueError("The limitPowComplexity kwarg is deprecated. Use constraints.")
|
558 |
|
559 |
|
560 |
def run_feature_selection(X, y, select_k_features):
|