MilesCranmer commited on
Commit
bd5fc10
·
unverified ·
2 Parent(s): 228d437 3ff33b4

Merge pull request #398 from MilesCranmer/use-display-variables

Browse files
Files changed (2) hide show
  1. pysr/sr.py +13 -35
  2. pysr/version.py +2 -2
pysr/sr.py CHANGED
@@ -572,7 +572,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
572
  Default is `False`.
573
  verbosity : int
574
  What verbosity level to use. 0 means minimal print statements.
575
- Default is `1e9`.
576
  update_verbosity : int
577
  What verbosity level to use for package updates.
578
  Will take value of `verbosity` if not given.
@@ -661,7 +661,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
661
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
662
  Names of features seen during :term:`fit`. Defined only when `X`
663
  has feature names that are all strings.
664
- pretty_feature_names_in_ : ndarray of shape (`n_features_in_`,)
665
  Pretty names of features, used only during printing.
666
  X_units_ : list[str] of length n_features
667
  Units of each variable in the training dataset, `X`.
@@ -791,7 +791,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
791
  random_state=None,
792
  deterministic=False,
793
  warm_start=False,
794
- verbosity=1e9,
795
  update_verbosity=None,
796
  print_precision=5,
797
  progress=True,
@@ -1033,13 +1033,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1033
 
1034
  if feature_names_in is None:
1035
  model.feature_names_in_ = np.array([f"x{i}" for i in range(n_features_in)])
1036
- model.pretty_feature_names_in_ = np.array(
1037
  [f"x{_subscriptify(i)}" for i in range(n_features_in)]
1038
  )
1039
  else:
1040
  assert len(feature_names_in) == n_features_in
1041
  model.feature_names_in_ = feature_names_in
1042
- model.pretty_feature_names_in_ = None
1043
 
1044
  if selection_mask is None:
1045
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
@@ -1313,7 +1313,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1313
  "constraints": {},
1314
  "multithreading": self.procs != 0 and self.cluster_manager is None,
1315
  "batch_size": 1,
1316
- "update_verbosity": self.verbosity,
1317
  "progress": buffer_available,
1318
  }
1319
  packed_modified_params = {}
@@ -1444,11 +1444,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1444
 
1445
  if self.feature_names_in_ is None:
1446
  self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])
1447
- self.pretty_feature_names_in_ = np.array(
1448
  [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1449
  )
1450
  else:
1451
- self.pretty_feature_names_in_ = None
1452
 
1453
  variable_names = self.feature_names_in_
1454
 
@@ -1537,7 +1537,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1537
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1538
  # Update feature names with selected variable names
1539
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1540
- self.pretty_feature_names_in_ = None
1541
  print(f"Using features {self.feature_names_in_}")
1542
 
1543
  # Denoising transformation
@@ -1729,7 +1729,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1729
  ncycles_per_iteration=self.ncyclesperiteration,
1730
  fraction_replaced=self.fraction_replaced,
1731
  topn=self.topn,
1732
- verbosity=self.verbosity,
1733
  print_precision=self.print_precision,
1734
  optimizer_algorithm=self.optimizer_algorithm,
1735
  optimizer_nrestarts=self.optimizer_nrestarts,
@@ -1737,7 +1736,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1737
  optimizer_iterations=self.optimizer_iterations,
1738
  perturbation_factor=self.perturbation_factor,
1739
  annealing=self.annealing,
1740
- progress=progress,
1741
  timeout_in_seconds=self.timeout_in_seconds,
1742
  crossover_probability=self.crossover_probability,
1743
  skip_mutation_failures=self.skip_mutation_failures,
@@ -1795,12 +1793,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1795
  Main.y,
1796
  weights=Main.weights,
1797
  niterations=int(self.niterations),
1798
- variable_names=(
1799
- self.pretty_feature_names_in_.tolist()
1800
- if hasattr(self, "pretty_feature_names_in_")
1801
- and self.pretty_feature_names_in_ is not None
1802
- else self.feature_names_in_.tolist()
1803
- ),
1804
  y_variable_names=y_variable_names,
1805
  X_units=self.X_units_,
1806
  y_units=self.y_units_,
@@ -1810,6 +1804,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1810
  saved_state=self.raw_julia_state_,
1811
  return_state=True,
1812
  addprocs_function=cluster_manager,
 
 
1813
  )
1814
 
1815
  # Set attributes
@@ -2220,24 +2216,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
2220
  "Equation": "equation",
2221
  },
2222
  )
2223
- # Regexp replace x₁₂₃ to x123 in `equation`:
2224
- if (
2225
- hasattr(self, "pretty_feature_names_in_")
2226
- and self.pretty_feature_names_in_ is not None
2227
- ):
2228
- # df["equation"] = df["equation"].apply(_undo_subscriptify_full)
2229
- for pname, name in zip(
2230
- self.pretty_feature_names_in_, self.feature_names_in_
2231
- ):
2232
- df["equation"] = df["equation"].apply(
2233
- lambda s: re.sub(
2234
- r"\b" + f"({pname})" + r"\b",
2235
- name,
2236
- s,
2237
- )
2238
- if isinstance(s, str)
2239
- else s
2240
- )
2241
 
2242
  return df
2243
 
 
572
  Default is `False`.
573
  verbosity : int
574
  What verbosity level to use. 0 means minimal print statements.
575
+ Default is `1`.
576
  update_verbosity : int
577
  What verbosity level to use for package updates.
578
  Will take value of `verbosity` if not given.
 
661
  feature_names_in_ : ndarray of shape (`n_features_in_`,)
662
  Names of features seen during :term:`fit`. Defined only when `X`
663
  has feature names that are all strings.
664
+ display_feature_names_in_ : ndarray of shape (`n_features_in_`,)
665
  Pretty names of features, used only during printing.
666
  X_units_ : list[str] of length n_features
667
  Units of each variable in the training dataset, `X`.
 
791
  random_state=None,
792
  deterministic=False,
793
  warm_start=False,
794
+ verbosity=1,
795
  update_verbosity=None,
796
  print_precision=5,
797
  progress=True,
 
1033
 
1034
  if feature_names_in is None:
1035
  model.feature_names_in_ = np.array([f"x{i}" for i in range(n_features_in)])
1036
+ model.display_feature_names_in_ = np.array(
1037
  [f"x{_subscriptify(i)}" for i in range(n_features_in)]
1038
  )
1039
  else:
1040
  assert len(feature_names_in) == n_features_in
1041
  model.feature_names_in_ = feature_names_in
1042
+ model.display_feature_names_in_ = feature_names_in
1043
 
1044
  if selection_mask is None:
1045
  model.selection_mask_ = np.ones(n_features_in, dtype=bool)
 
1313
  "constraints": {},
1314
  "multithreading": self.procs != 0 and self.cluster_manager is None,
1315
  "batch_size": 1,
1316
+ "update_verbosity": int(self.verbosity),
1317
  "progress": buffer_available,
1318
  }
1319
  packed_modified_params = {}
 
1444
 
1445
  if self.feature_names_in_ is None:
1446
  self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])
1447
+ self.display_feature_names_in_ = np.array(
1448
  [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
1449
  )
1450
  else:
1451
+ self.display_feature_names_in_ = self.feature_names_in_
1452
 
1453
  variable_names = self.feature_names_in_
1454
 
 
1537
  X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
1538
  # Update feature names with selected variable names
1539
  self.feature_names_in_ = _check_feature_names_in(self, variable_names)
1540
+ self.display_feature_names_in_ = self.feature_names_in_
1541
  print(f"Using features {self.feature_names_in_}")
1542
 
1543
  # Denoising transformation
 
1729
  ncycles_per_iteration=self.ncyclesperiteration,
1730
  fraction_replaced=self.fraction_replaced,
1731
  topn=self.topn,
 
1732
  print_precision=self.print_precision,
1733
  optimizer_algorithm=self.optimizer_algorithm,
1734
  optimizer_nrestarts=self.optimizer_nrestarts,
 
1736
  optimizer_iterations=self.optimizer_iterations,
1737
  perturbation_factor=self.perturbation_factor,
1738
  annealing=self.annealing,
 
1739
  timeout_in_seconds=self.timeout_in_seconds,
1740
  crossover_probability=self.crossover_probability,
1741
  skip_mutation_failures=self.skip_mutation_failures,
 
1793
  Main.y,
1794
  weights=Main.weights,
1795
  niterations=int(self.niterations),
1796
+ variable_names=self.feature_names_in_.tolist(),
1797
+ display_variable_names=self.display_feature_names_in_.tolist(),
 
 
 
 
1798
  y_variable_names=y_variable_names,
1799
  X_units=self.X_units_,
1800
  y_units=self.y_units_,
 
1804
  saved_state=self.raw_julia_state_,
1805
  return_state=True,
1806
  addprocs_function=cluster_manager,
1807
+ progress=progress and self.verbosity > 0 and len(y.shape) == 1,
1808
+ verbosity=int(self.verbosity),
1809
  )
1810
 
1811
  # Set attributes
 
2216
  "Equation": "equation",
2217
  },
2218
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2219
 
2220
  return df
2221
 
pysr/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.15.2"
2
- __symbolic_regression_jl_version__ = "0.21.3"
 
1
+ __version__ = "0.15.3"
2
+ __symbolic_regression_jl_version__ = "0.21.5"