MilesCranmer commited on
Commit
e47833c
·
1 Parent(s): bfd7114

Enable custom cluster managers

Browse files
Files changed (2) hide show
  1. pysr/sr.py +22 -3
  2. pysr/version.py +1 -1
pysr/sr.py CHANGED
@@ -333,12 +333,18 @@ def init_julia():
333
 
334
 
335
  def _add_sr_to_julia_project(Main, io_arg):
336
- Main.spec = Main.PackageSpec(
337
  name="SymbolicRegression",
338
  url="https://github.com/MilesCranmer/SymbolicRegression.jl",
339
  rev="v" + __symbolic_regression_jl_version__,
340
  )
341
- Main.eval(f"Pkg.add(spec, {io_arg})")
 
 
 
 
 
 
342
 
343
 
344
  class PySRRegressor(BaseEstimator, RegressorMixin):
@@ -411,6 +417,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
411
  Xresampled=None,
412
  precision=32,
413
  multithreading=None,
 
414
  use_symbolic_utils=False,
415
  skip_mutation_failures=True,
416
  # To support deprecated kwargs:
@@ -444,6 +451,11 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
444
  :type procs: int
445
  :param multithreading: Use multithreading instead of distributed backend. Default is yes. Using procs=0 will turn off both.
446
  :type multithreading: bool
 
 
 
 
 
447
  :param batching: whether to compare population members on small batches during evolution. Still uses full dataset for comparing against hall of fame.
448
  :type batching: bool
449
  :param batch_size: the amount of data to use if doing batching.
@@ -624,7 +636,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
624
  if multithreading is None:
625
  # Default is multithreading=True, unless explicitly set,
626
  # or procs is set to 0 (serial mode).
627
- multithreading = procs != 0
628
  if update_verbosity is None:
629
  update_verbosity = verbosity
630
 
@@ -734,6 +746,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
734
  Xresampled=Xresampled,
735
  precision=precision,
736
  multithreading=multithreading,
 
737
  use_symbolic_utils=use_symbolic_utils,
738
  skip_mutation_failures=skip_mutation_failures,
739
  ),
@@ -1034,6 +1047,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
1034
  )
1035
 
1036
  multithreading = self.params["multithreading"]
 
1037
  procs = self.params["procs"]
1038
  binary_operators = self.params["binary_operators"]
1039
  unary_operators = self.params["unary_operators"]
@@ -1060,6 +1074,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
1060
 
1061
  Main = init_julia()
1062
 
 
 
 
 
1063
  if isinstance(X, pd.DataFrame):
1064
  if variable_names is not None:
1065
  warnings.warn("Resetting variable_names from X.columns")
@@ -1332,6 +1350,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
1332
  numprocs=int(cprocs),
1333
  multithreading=bool(multithreading),
1334
  saved_state=self.raw_julia_state,
 
1335
  )
1336
 
1337
  self.variable_names = variable_names
 
333
 
334
 
335
  def _add_sr_to_julia_project(Main, io_arg):
336
+ Main.sr_spec = Main.PackageSpec(
337
  name="SymbolicRegression",
338
  url="https://github.com/MilesCranmer/SymbolicRegression.jl",
339
  rev="v" + __symbolic_regression_jl_version__,
340
  )
341
+ Main.eval(f"Pkg.add(sr_spec, {io_arg})")
342
+ Main.clustermanagers_spec = Main.PackageSpec(
343
+ name="ClusterManagers",
344
+ url="https://github.com/JuliaParallel/ClusterManagers.jl",
345
+ rev="14e7302f068794099344d5d93f71979aaf4fbeb3",
346
+ )
347
+ Main.eval(f"Pkg.add(clustermanagers_spec, {io_arg})")
348
 
349
 
350
  class PySRRegressor(BaseEstimator, RegressorMixin):
 
417
  Xresampled=None,
418
  precision=32,
419
  multithreading=None,
420
+ cluster_manager=None,
421
  use_symbolic_utils=False,
422
  skip_mutation_failures=True,
423
  # To support deprecated kwargs:
 
451
  :type procs: int
452
  :param multithreading: Use multithreading instead of distributed backend. Default is yes. Using procs=0 will turn off both.
453
  :type multithreading: bool
454
+ :param cluster_manager: For distributed computing, this sets the job queue
455
+ system. Set to one of "slurm", "pbs", "lsf", "sge", "qrsh", "scyld", or "htc".
456
+ If set to one of these, PySR will run in distributed mode, and use `procs` to figure
457
+ out how many processes to launch.
458
+ :type cluster_manager: str
459
  :param batching: whether to compare population members on small batches during evolution. Still uses full dataset for comparing against hall of fame.
460
  :type batching: bool
461
  :param batch_size: the amount of data to use if doing batching.
 
636
  if multithreading is None:
637
  # Default is multithreading=True, unless explicitly set,
638
  # or procs is set to 0 (serial mode).
639
+ multithreading = procs != 0 and cluster_manager is None
640
  if update_verbosity is None:
641
  update_verbosity = verbosity
642
 
 
746
  Xresampled=Xresampled,
747
  precision=precision,
748
  multithreading=multithreading,
749
+ cluster_manager=cluster_manager,
750
  use_symbolic_utils=use_symbolic_utils,
751
  skip_mutation_failures=skip_mutation_failures,
752
  ),
 
1047
  )
1048
 
1049
  multithreading = self.params["multithreading"]
1050
+ cluster_manager = self.params["cluster_manager"]
1051
  procs = self.params["procs"]
1052
  binary_operators = self.params["binary_operators"]
1053
  unary_operators = self.params["unary_operators"]
 
1074
 
1075
  Main = init_julia()
1076
 
1077
+ if cluster_manager is not None:
1078
+ Main.eval(f"import ClusterManagers: addprocs_{cluster_manager}")
1079
+ cluster_manager = Main.eval(f"addprocs_{cluster_manager}")
1080
+
1081
  if isinstance(X, pd.DataFrame):
1082
  if variable_names is not None:
1083
  warnings.warn("Resetting variable_names from X.columns")
 
1350
  numprocs=int(cprocs),
1351
  multithreading=bool(multithreading),
1352
  saved_state=self.raw_julia_state,
1353
+ addprocs_function=cluster_manager,
1354
  )
1355
 
1356
  self.variable_names = variable_names
pysr/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.8.0"
2
  __symbolic_regression_jl_version__ = "0.8.7"
 
1
+ __version__ = "0.8.1"
2
  __symbolic_regression_jl_version__ = "0.8.7"