Spaces:
Running
Running
MilesCranmer
commited on
Commit
β’
e5a9067
1
Parent(s):
117b2c3
Create pretty variable names for print outs
Browse files- pysr/sr.py +37 -31
pysr/sr.py
CHANGED
@@ -633,8 +633,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
633 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
634 |
Names of features seen during :term:`fit`. Defined only when `X`
|
635 |
has feature names that are all strings.
|
636 |
-
|
637 |
-
|
638 |
nout_ : int
|
639 |
Number of output dimensions.
|
640 |
selection_mask_ : list[int] of length `select_k_features`
|
@@ -997,12 +997,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
997 |
model.n_features_in_ = n_features_in
|
998 |
|
999 |
if feature_names_in is None:
|
1000 |
-
model.feature_names_in_ = [f"x{
|
1001 |
-
model.
|
|
|
|
|
1002 |
else:
|
1003 |
assert len(feature_names_in) == n_features_in
|
1004 |
model.feature_names_in_ = feature_names_in
|
1005 |
-
model.
|
1006 |
|
1007 |
if selection_mask is None:
|
1008 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
@@ -1388,17 +1390,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1388 |
weights = check_array(weights, ensure_2d=False)
|
1389 |
check_consistent_length(weights, y)
|
1390 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1391 |
-
feature_names_in_ = _check_feature_names_in(
|
1392 |
-
|
1393 |
-
|
1394 |
-
|
1395 |
-
|
1396 |
-
|
1397 |
-
|
1398 |
-
|
|
|
1399 |
else:
|
1400 |
-
self.
|
1401 |
-
self.is_default_feature_names_ = False
|
1402 |
|
1403 |
variable_names = self.feature_names_in_
|
1404 |
|
@@ -1721,7 +1723,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
1721 |
Main.y,
|
1722 |
weights=Main.weights,
|
1723 |
niterations=int(self.niterations),
|
1724 |
-
variable_names=
|
|
|
|
|
|
|
|
|
1725 |
options=options,
|
1726 |
numprocs=cprocs,
|
1727 |
parallelism=parallelism,
|
@@ -2098,9 +2104,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2098 |
with open(filename, "r") as f:
|
2099 |
buf = f.read()
|
2100 |
buf = _preprocess_julia_floats(buf)
|
2101 |
-
all_outputs = [
|
2102 |
-
self._postprocess_dataframe(pd.read_csv(StringIO(buf)))
|
2103 |
-
]
|
2104 |
|
2105 |
except FileNotFoundError:
|
2106 |
raise RuntimeError(
|
@@ -2118,14 +2122,23 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2118 |
},
|
2119 |
)
|
2120 |
# Regexp replace xβββ to x123 in `equation`:
|
2121 |
-
if self.
|
2122 |
-
df["equation"] = df["equation"].apply(
|
2123 |
-
|
2124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2125 |
|
2126 |
return df
|
2127 |
|
2128 |
-
|
2129 |
def get_hof(self):
|
2130 |
"""Get the equations from a hall of fame file.
|
2131 |
|
@@ -2434,10 +2447,3 @@ def _subscriptify(i: int) -> str:
|
|
2434 |
For example, 123 -> "βββ".
|
2435 |
"""
|
2436 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
2437 |
-
|
2438 |
-
def _undo_subscriptify(s: str) -> int:
|
2439 |
-
"""Converts subscript text form to integer.
|
2440 |
-
|
2441 |
-
For example, "βββ" -> 123.
|
2442 |
-
"""
|
2443 |
-
return int("".join([str(ord(c) - 0x2080) for c in s]))
|
|
|
633 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
634 |
Names of features seen during :term:`fit`. Defined only when `X`
|
635 |
has feature names that are all strings.
|
636 |
+
pretty_feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
637 |
+
Pretty names of features, used only during printing.
|
638 |
nout_ : int
|
639 |
Number of output dimensions.
|
640 |
selection_mask_ : list[int] of length `select_k_features`
|
|
|
997 |
model.n_features_in_ = n_features_in
|
998 |
|
999 |
if feature_names_in is None:
|
1000 |
+
model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
|
1001 |
+
model.pretty_feature_names_in_ = [
|
1002 |
+
f"x{_subscriptify(i)}" for i in range(n_features_in)
|
1003 |
+
]
|
1004 |
else:
|
1005 |
assert len(feature_names_in) == n_features_in
|
1006 |
model.feature_names_in_ = feature_names_in
|
1007 |
+
model.pretty_feature_names_in_ = None
|
1008 |
|
1009 |
if selection_mask is None:
|
1010 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
|
|
1390 |
weights = check_array(weights, ensure_2d=False)
|
1391 |
check_consistent_length(weights, y)
|
1392 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
1393 |
+
self.feature_names_in_ = _check_feature_names_in(
|
1394 |
+
self, variable_names, generate_names=False
|
1395 |
+
)
|
1396 |
+
|
1397 |
+
if self.feature_names_in_ is None:
|
1398 |
+
self.feature_names_in_ = [f"x{i}" for i in range(X.shape[1])]
|
1399 |
+
self.pretty_feature_names_in_ = [
|
1400 |
+
f"x{_subscriptify(i)}" for i in range(X.shape[1])
|
1401 |
+
]
|
1402 |
else:
|
1403 |
+
self.pretty_feature_names_in_ = None
|
|
|
1404 |
|
1405 |
variable_names = self.feature_names_in_
|
1406 |
|
|
|
1723 |
Main.y,
|
1724 |
weights=Main.weights,
|
1725 |
niterations=int(self.niterations),
|
1726 |
+
variable_names=(
|
1727 |
+
self.pretty_feature_names_in_
|
1728 |
+
if self.pretty_feature_names_in_ is not None
|
1729 |
+
else self.feature_names_in_
|
1730 |
+
),
|
1731 |
options=options,
|
1732 |
numprocs=cprocs,
|
1733 |
parallelism=parallelism,
|
|
|
2104 |
with open(filename, "r") as f:
|
2105 |
buf = f.read()
|
2106 |
buf = _preprocess_julia_floats(buf)
|
2107 |
+
all_outputs = [self._postprocess_dataframe(pd.read_csv(StringIO(buf)))]
|
|
|
|
|
2108 |
|
2109 |
except FileNotFoundError:
|
2110 |
raise RuntimeError(
|
|
|
2122 |
},
|
2123 |
)
|
2124 |
# Regexp replace xβββ to x123 in `equation`:
|
2125 |
+
if self.pretty_feature_names_in_ is not None:
|
2126 |
+
# df["equation"] = df["equation"].apply(_undo_subscriptify_full)
|
2127 |
+
for pname, name in zip(
|
2128 |
+
self.pretty_feature_names_in_, self.feature_names_in_
|
2129 |
+
):
|
2130 |
+
df["equation"] = df["equation"].apply(
|
2131 |
+
lambda s: re.sub(
|
2132 |
+
r"\b" + f"({pname})" + r"\b",
|
2133 |
+
name,
|
2134 |
+
s,
|
2135 |
+
)
|
2136 |
+
if isinstance(s, str)
|
2137 |
+
else s
|
2138 |
+
)
|
2139 |
|
2140 |
return df
|
2141 |
|
|
|
2142 |
def get_hof(self):
|
2143 |
"""Get the equations from a hall of fame file.
|
2144 |
|
|
|
2447 |
For example, 123 -> "βββ".
|
2448 |
"""
|
2449 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|