Spaces:
Running
Running
MilesCranmer
commited on
Merge pull request #398 from MilesCranmer/use-display-variables
Browse files- pysr/sr.py +13 -35
- pysr/version.py +2 -2
pysr/sr.py
CHANGED
@@ -572,7 +572,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
572 |
Default is `False`.
|
573 |
verbosity : int
|
574 |
What verbosity level to use. 0 means minimal print statements.
|
575 |
-
Default is `
|
576 |
update_verbosity : int
|
577 |
What verbosity level to use for package updates.
|
578 |
Will take value of `verbosity` if not given.
|
@@ -661,7 +661,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
661 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
662 |
Names of features seen during :term:`fit`. Defined only when `X`
|
663 |
has feature names that are all strings.
|
664 |
-
|
665 |
Pretty names of features, used only during printing.
|
666 |
X_units_ : list[str] of length n_features
|
667 |
Units of each variable in the training dataset, `X`.
|
@@ -791,7 +791,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
791 |
random_state=None,
|
792 |
deterministic=False,
|
793 |
warm_start=False,
|
794 |
-
verbosity=
|
795 |
update_verbosity=None,
|
796 |
print_precision=5,
|
797 |
progress=True,
|
@@ -1033,13 +1033,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1033 |
|
1034 |
if feature_names_in is None:
|
1035 |
model.feature_names_in_ = np.array([f"x{i}" for i in range(n_features_in)])
|
1036 |
-
model.
|
1037 |
[f"x{_subscriptify(i)}" for i in range(n_features_in)]
|
1038 |
)
|
1039 |
else:
|
1040 |
assert len(feature_names_in) == n_features_in
|
1041 |
model.feature_names_in_ = feature_names_in
|
1042 |
-
model.
|
1043 |
|
1044 |
if selection_mask is None:
|
1045 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
@@ -1313,7 +1313,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1313 |
"constraints": {},
|
1314 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
1315 |
"batch_size": 1,
|
1316 |
-
"update_verbosity": self.verbosity,
|
1317 |
"progress": buffer_available,
|
1318 |
}
|
1319 |
packed_modified_params = {}
|
@@ -1444,11 +1444,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1444 |
|
1445 |
if self.feature_names_in_ is None:
|
1446 |
self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])
|
1447 |
-
self.
|
1448 |
[f"x{_subscriptify(i)}" for i in range(X.shape[1])]
|
1449 |
)
|
1450 |
else:
|
1451 |
-
self.
|
1452 |
|
1453 |
variable_names = self.feature_names_in_
|
1454 |
|
@@ -1537,7 +1537,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1537 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1538 |
# Update feature names with selected variable names
|
1539 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1540 |
-
self.
|
1541 |
print(f"Using features {self.feature_names_in_}")
|
1542 |
|
1543 |
# Denoising transformation
|
@@ -1729,7 +1729,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1729 |
ncycles_per_iteration=self.ncyclesperiteration,
|
1730 |
fraction_replaced=self.fraction_replaced,
|
1731 |
topn=self.topn,
|
1732 |
-
verbosity=self.verbosity,
|
1733 |
print_precision=self.print_precision,
|
1734 |
optimizer_algorithm=self.optimizer_algorithm,
|
1735 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
@@ -1737,7 +1736,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1737 |
optimizer_iterations=self.optimizer_iterations,
|
1738 |
perturbation_factor=self.perturbation_factor,
|
1739 |
annealing=self.annealing,
|
1740 |
-
progress=progress,
|
1741 |
timeout_in_seconds=self.timeout_in_seconds,
|
1742 |
crossover_probability=self.crossover_probability,
|
1743 |
skip_mutation_failures=self.skip_mutation_failures,
|
@@ -1795,12 +1793,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1795 |
Main.y,
|
1796 |
weights=Main.weights,
|
1797 |
niterations=int(self.niterations),
|
1798 |
-
variable_names=(
|
1799 |
-
|
1800 |
-
if hasattr(self, "pretty_feature_names_in_")
|
1801 |
-
and self.pretty_feature_names_in_ is not None
|
1802 |
-
else self.feature_names_in_.tolist()
|
1803 |
-
),
|
1804 |
y_variable_names=y_variable_names,
|
1805 |
X_units=self.X_units_,
|
1806 |
y_units=self.y_units_,
|
@@ -1810,6 +1804,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1810 |
saved_state=self.raw_julia_state_,
|
1811 |
return_state=True,
|
1812 |
addprocs_function=cluster_manager,
|
|
|
|
|
1813 |
)
|
1814 |
|
1815 |
# Set attributes
|
@@ -2220,24 +2216,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2220 |
"Equation": "equation",
|
2221 |
},
|
2222 |
)
|
2223 |
-
# Regexp replace x₁₂₃ to x123 in `equation`:
|
2224 |
-
if (
|
2225 |
-
hasattr(self, "pretty_feature_names_in_")
|
2226 |
-
and self.pretty_feature_names_in_ is not None
|
2227 |
-
):
|
2228 |
-
# df["equation"] = df["equation"].apply(_undo_subscriptify_full)
|
2229 |
-
for pname, name in zip(
|
2230 |
-
self.pretty_feature_names_in_, self.feature_names_in_
|
2231 |
-
):
|
2232 |
-
df["equation"] = df["equation"].apply(
|
2233 |
-
lambda s: re.sub(
|
2234 |
-
r"\b" + f"({pname})" + r"\b",
|
2235 |
-
name,
|
2236 |
-
s,
|
2237 |
-
)
|
2238 |
-
if isinstance(s, str)
|
2239 |
-
else s
|
2240 |
-
)
|
2241 |
|
2242 |
return df
|
2243 |
|
|
|
572 |
Default is `False`.
|
573 |
verbosity : int
|
574 |
What verbosity level to use. 0 means minimal print statements.
|
575 |
+
Default is `1`.
|
576 |
update_verbosity : int
|
577 |
What verbosity level to use for package updates.
|
578 |
Will take value of `verbosity` if not given.
|
|
|
661 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
662 |
Names of features seen during :term:`fit`. Defined only when `X`
|
663 |
has feature names that are all strings.
|
664 |
+
display_feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
665 |
Pretty names of features, used only during printing.
|
666 |
X_units_ : list[str] of length n_features
|
667 |
Units of each variable in the training dataset, `X`.
|
|
|
791 |
random_state=None,
|
792 |
deterministic=False,
|
793 |
warm_start=False,
|
794 |
+
verbosity=1,
|
795 |
update_verbosity=None,
|
796 |
print_precision=5,
|
797 |
progress=True,
|
|
|
1033 |
|
1034 |
if feature_names_in is None:
|
1035 |
model.feature_names_in_ = np.array([f"x{i}" for i in range(n_features_in)])
|
1036 |
+
model.display_feature_names_in_ = np.array(
|
1037 |
[f"x{_subscriptify(i)}" for i in range(n_features_in)]
|
1038 |
)
|
1039 |
else:
|
1040 |
assert len(feature_names_in) == n_features_in
|
1041 |
model.feature_names_in_ = feature_names_in
|
1042 |
+
model.display_feature_names_in_ = feature_names_in
|
1043 |
|
1044 |
if selection_mask is None:
|
1045 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
|
|
1313 |
"constraints": {},
|
1314 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
1315 |
"batch_size": 1,
|
1316 |
+
"update_verbosity": int(self.verbosity),
|
1317 |
"progress": buffer_available,
|
1318 |
}
|
1319 |
packed_modified_params = {}
|
|
|
1444 |
|
1445 |
if self.feature_names_in_ is None:
|
1446 |
self.feature_names_in_ = np.array([f"x{i}" for i in range(X.shape[1])])
|
1447 |
+
self.display_feature_names_in_ = np.array(
|
1448 |
[f"x{_subscriptify(i)}" for i in range(X.shape[1])]
|
1449 |
)
|
1450 |
else:
|
1451 |
+
self.display_feature_names_in_ = self.feature_names_in_
|
1452 |
|
1453 |
variable_names = self.feature_names_in_
|
1454 |
|
|
|
1537 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1538 |
# Update feature names with selected variable names
|
1539 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1540 |
+
self.display_feature_names_in_ = self.feature_names_in_
|
1541 |
print(f"Using features {self.feature_names_in_}")
|
1542 |
|
1543 |
# Denoising transformation
|
|
|
1729 |
ncycles_per_iteration=self.ncyclesperiteration,
|
1730 |
fraction_replaced=self.fraction_replaced,
|
1731 |
topn=self.topn,
|
|
|
1732 |
print_precision=self.print_precision,
|
1733 |
optimizer_algorithm=self.optimizer_algorithm,
|
1734 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
|
|
1736 |
optimizer_iterations=self.optimizer_iterations,
|
1737 |
perturbation_factor=self.perturbation_factor,
|
1738 |
annealing=self.annealing,
|
|
|
1739 |
timeout_in_seconds=self.timeout_in_seconds,
|
1740 |
crossover_probability=self.crossover_probability,
|
1741 |
skip_mutation_failures=self.skip_mutation_failures,
|
|
|
1793 |
Main.y,
|
1794 |
weights=Main.weights,
|
1795 |
niterations=int(self.niterations),
|
1796 |
+
variable_names=self.feature_names_in_.tolist(),
|
1797 |
+
display_variable_names=self.display_feature_names_in_.tolist(),
|
|
|
|
|
|
|
|
|
1798 |
y_variable_names=y_variable_names,
|
1799 |
X_units=self.X_units_,
|
1800 |
y_units=self.y_units_,
|
|
|
1804 |
saved_state=self.raw_julia_state_,
|
1805 |
return_state=True,
|
1806 |
addprocs_function=cluster_manager,
|
1807 |
+
progress=progress and self.verbosity > 0 and len(y.shape) == 1,
|
1808 |
+
verbosity=int(self.verbosity),
|
1809 |
)
|
1810 |
|
1811 |
# Set attributes
|
|
|
2216 |
"Equation": "equation",
|
2217 |
},
|
2218 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2219 |
|
2220 |
return df
|
2221 |
|
pysr/version.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
__version__ = "0.15.
|
2 |
-
__symbolic_regression_jl_version__ = "0.21.
|
|
|
1 |
+
__version__ = "0.15.3"
|
2 |
+
__symbolic_regression_jl_version__ = "0.21.5"
|