Spaces:
Running
Running
tttc3
commited on
Commit
•
3e8d44d
1
Parent(s):
3821242
Add warm_start
Browse files- pysr/sr.py +35 -47
pysr/sr.py
CHANGED
@@ -177,7 +177,7 @@ def best_callable(*args, **kwargs): # pragma: no cover
|
|
177 |
VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
|
178 |
|
179 |
|
180 |
-
class PySRRegressor(
|
181 |
"""
|
182 |
High-performance symbolic regression.
|
183 |
|
@@ -431,6 +431,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
431 |
Pass an int for reproducible results across multiple function calls.
|
432 |
See :term:`Glossary <random_state>`.
|
433 |
|
|
|
|
|
|
|
|
|
434 |
verbosity : int, default=1e9
|
435 |
What verbosity level to use. 0 means minimal print statements.
|
436 |
|
@@ -633,6 +637,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
633 |
fast_cycle=False,
|
634 |
precision=32,
|
635 |
random_state=None,
|
|
|
636 |
verbosity=1e9,
|
637 |
update_verbosity=None,
|
638 |
progress=True,
|
@@ -717,6 +722,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
717 |
self.fast_cycle = fast_cycle
|
718 |
self.precision = precision
|
719 |
self.random_state = random_state
|
|
|
720 |
# Additional runtime parameters
|
721 |
# - Runtime user interface
|
722 |
self.verbosity = verbosity
|
@@ -914,8 +920,11 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
914 |
if self.temp_equation_file:
|
915 |
self.equation_file_ = self.tempdir_ / "hall_of_fame.csv"
|
916 |
elif self.equation_file is None:
|
917 |
-
|
918 |
-
|
|
|
|
|
|
|
919 |
else:
|
920 |
self.equation_file_ = self.equation_file
|
921 |
|
@@ -1433,10 +1442,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1433 |
Fitted Estimator.
|
1434 |
"""
|
1435 |
# Init attributes that are not specified in BaseEstimator
|
1436 |
-
self.
|
1437 |
-
|
1438 |
-
|
1439 |
-
|
|
|
|
|
|
|
1440 |
|
1441 |
random_state = check_random_state(self.random_state) # For np random
|
1442 |
seed = random_state.get_state()[1][0] # For julia random
|
@@ -1510,31 +1522,35 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1510 |
self.equation_file_ = checkpoint_file
|
1511 |
self.equations_ = self.get_hof()
|
1512 |
|
1513 |
-
def
|
1514 |
"""
|
1515 |
-
|
1516 |
-
|
|
|
|
|
1517 |
|
1518 |
Parameters
|
1519 |
----------
|
1520 |
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
1521 |
-
|
1522 |
|
1523 |
-
|
1524 |
-
|
|
|
1525 |
|
1526 |
Returns
|
1527 |
-------
|
1528 |
-
y_predicted : ndarray of shape (n_samples,
|
1529 |
-
Values predicted by substituting `X` into the
|
1530 |
-
|
1531 |
|
1532 |
Raises
|
1533 |
------
|
1534 |
ValueError
|
1535 |
Raises if the `best_equation` cannot be evaluated.
|
1536 |
"""
|
1537 |
-
|
|
|
1538 |
|
1539 |
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
1540 |
# the self.feature_names_in_ generated during fit, for the same X,
|
@@ -1542,16 +1558,15 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1542 |
# To avoid this, convert X to a dataframe, apply the selection mask,
|
1543 |
# and then set the column/feature_names of X to be equal to those
|
1544 |
# generated during fit.
|
1545 |
-
if isinstance(X,
|
|
|
1546 |
X = pd.DataFrame(X)
|
1547 |
-
|
1548 |
if isinstance(X.columns, pd.RangeIndex):
|
1549 |
if self.selection_mask_ is not None:
|
1550 |
# RangeIndex enforces column order allowing columns to
|
1551 |
# be correctly filtered with self.selection_mask_
|
1552 |
X = X.iloc[:, self.selection_mask_]
|
1553 |
X.columns = self.feature_names_in_
|
1554 |
-
|
1555 |
# Without feature information, CallableEquation/lambda_format equations
|
1556 |
# require that the column order of X matches that of the X used during
|
1557 |
# the fitting process. _validate_data removes this feature information
|
@@ -1560,7 +1575,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1560 |
# reordered/reindexed to match those of the transformed (denoised and
|
1561 |
# feature selected) X in fit.
|
1562 |
X = X.reindex(columns=self.feature_names_in_)
|
1563 |
-
|
1564 |
X = self._validate_data(X, reset=False)
|
1565 |
|
1566 |
try:
|
@@ -1576,32 +1590,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1576 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
|
1577 |
) from error
|
1578 |
|
1579 |
-
def predict(self, X, index=None):
|
1580 |
-
"""
|
1581 |
-
Predict y from input X using the equation chosen by `model_selection`.
|
1582 |
-
|
1583 |
-
You may see what equation is used by printing this object. X should
|
1584 |
-
have the same columns as the training data.
|
1585 |
-
|
1586 |
-
Parameters
|
1587 |
-
----------
|
1588 |
-
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
1589 |
-
Training data.
|
1590 |
-
|
1591 |
-
index : int, default=None
|
1592 |
-
If you want to compute the output of an expression using a
|
1593 |
-
particular row of `self.equations_`, you may specify the index here.
|
1594 |
-
|
1595 |
-
Returns
|
1596 |
-
-------
|
1597 |
-
y_predicted : ndarray of shape (n_samples, nout_)
|
1598 |
-
Values predicted by substituting `X` into the fitted symbolic
|
1599 |
-
regression model.
|
1600 |
-
"""
|
1601 |
-
self.refresh()
|
1602 |
-
best_equation = self.get_best(index=index)
|
1603 |
-
return self._decision_function(X, best_equation)
|
1604 |
-
|
1605 |
def sympy(self, index=None):
|
1606 |
"""
|
1607 |
Return sympy representation of the equation(s) chosen by `model_selection`.
|
|
|
177 |
VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
|
178 |
|
179 |
|
180 |
+
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
181 |
"""
|
182 |
High-performance symbolic regression.
|
183 |
|
|
|
431 |
Pass an int for reproducible results across multiple function calls.
|
432 |
See :term:`Glossary <random_state>`.
|
433 |
|
434 |
+
warm_start : bool, default=False
|
435 |
+
Tells fit to continue from where the last call to fit finished.
|
436 |
+
If false, each call to fit will be fresh, overwriting previous results.
|
437 |
+
|
438 |
verbosity : int, default=1e9
|
439 |
What verbosity level to use. 0 means minimal print statements.
|
440 |
|
|
|
637 |
fast_cycle=False,
|
638 |
precision=32,
|
639 |
random_state=None,
|
640 |
+
warm_start=False,
|
641 |
verbosity=1e9,
|
642 |
update_verbosity=None,
|
643 |
progress=True,
|
|
|
722 |
self.fast_cycle = fast_cycle
|
723 |
self.precision = precision
|
724 |
self.random_state = random_state
|
725 |
+
self.warm_start = warm_start
|
726 |
# Additional runtime parameters
|
727 |
# - Runtime user interface
|
728 |
self.verbosity = verbosity
|
|
|
920 |
if self.temp_equation_file:
|
921 |
self.equation_file_ = self.tempdir_ / "hall_of_fame.csv"
|
922 |
elif self.equation_file is None:
|
923 |
+
if self.warm_start and self.equation_file_:
|
924 |
+
pass
|
925 |
+
else:
|
926 |
+
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
927 |
+
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
928 |
else:
|
929 |
self.equation_file_ = self.equation_file
|
930 |
|
|
|
1442 |
Fitted Estimator.
|
1443 |
"""
|
1444 |
# Init attributes that are not specified in BaseEstimator
|
1445 |
+
if self.warm_start and hasattr(self, "raw_julia_state_"):
|
1446 |
+
pass
|
1447 |
+
else:
|
1448 |
+
self.equations_ = None
|
1449 |
+
self.nout_ = 1
|
1450 |
+
self.selection_mask_ = None
|
1451 |
+
self.raw_julia_state_ = None
|
1452 |
|
1453 |
random_state = check_random_state(self.random_state) # For np random
|
1454 |
seed = random_state.get_state()[1][0] # For julia random
|
|
|
1522 |
self.equation_file_ = checkpoint_file
|
1523 |
self.equations_ = self.get_hof()
|
1524 |
|
1525 |
+
def predict(self, X, index=None):
|
1526 |
"""
|
1527 |
+
Predict y from input X using the equation chosen by `model_selection`.
|
1528 |
+
|
1529 |
+
You may see what equation is used by printing this object. X should
|
1530 |
+
have the same columns as the training data.
|
1531 |
|
1532 |
Parameters
|
1533 |
----------
|
1534 |
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
1535 |
+
Training data.
|
1536 |
|
1537 |
+
index : int, default=None
|
1538 |
+
If you want to compute the output of an expression using a
|
1539 |
+
particular row of `self.equations_`, you may specify the index here.
|
1540 |
|
1541 |
Returns
|
1542 |
-------
|
1543 |
+
y_predicted : ndarray of shape (n_samples, nout_)
|
1544 |
+
Values predicted by substituting `X` into the fitted symbolic
|
1545 |
+
regression model.
|
1546 |
|
1547 |
Raises
|
1548 |
------
|
1549 |
ValueError
|
1550 |
Raises if the `best_equation` cannot be evaluated.
|
1551 |
"""
|
1552 |
+
self.refresh()
|
1553 |
+
best_equation = self.get_best(index=index)
|
1554 |
|
1555 |
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
1556 |
# the self.feature_names_in_ generated during fit, for the same X,
|
|
|
1558 |
# To avoid this, convert X to a dataframe, apply the selection mask,
|
1559 |
# and then set the column/feature_names of X to be equal to those
|
1560 |
# generated during fit.
|
1561 |
+
if not isinstance(X, pd.DataFrame):
|
1562 |
+
X = check_array(X)
|
1563 |
X = pd.DataFrame(X)
|
|
|
1564 |
if isinstance(X.columns, pd.RangeIndex):
|
1565 |
if self.selection_mask_ is not None:
|
1566 |
# RangeIndex enforces column order allowing columns to
|
1567 |
# be correctly filtered with self.selection_mask_
|
1568 |
X = X.iloc[:, self.selection_mask_]
|
1569 |
X.columns = self.feature_names_in_
|
|
|
1570 |
# Without feature information, CallableEquation/lambda_format equations
|
1571 |
# require that the column order of X matches that of the X used during
|
1572 |
# the fitting process. _validate_data removes this feature information
|
|
|
1575 |
# reordered/reindexed to match those of the transformed (denoised and
|
1576 |
# feature selected) X in fit.
|
1577 |
X = X.reindex(columns=self.feature_names_in_)
|
|
|
1578 |
X = self._validate_data(X, reset=False)
|
1579 |
|
1580 |
try:
|
|
|
1590 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
|
1591 |
) from error
|
1592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1593 |
def sympy(self, index=None):
|
1594 |
"""
|
1595 |
Return sympy representation of the equation(s) chosen by `model_selection`.
|