|
""" |
|
Functions |
|
--------- |
|
.. autosummary:: |
|
:toctree: generated/ |
|
|
|
fmin_l_bfgs_b |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
from numpy import array, asarray, float64, zeros |
|
from . import _lbfgsb |
|
from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt, |
|
_wrap_callback, _check_unknown_options, |
|
_prepare_scalar_function) |
|
from ._constraints import old_bound_to_new |
|
|
|
from scipy.sparse.linalg import LinearOperator |
|
|
|
__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct'] |
|
|
|
|
|
status_messages = { |
|
0 : "START", |
|
1 : "NEW_X", |
|
2 : "RESTART", |
|
3 : "FG", |
|
4 : "CONVERGENCE", |
|
5 : "STOP", |
|
6 : "WARNING", |
|
7 : "ERROR", |
|
8 : "ABNORMAL" |
|
} |
|
|
|
|
|
task_messages = { |
|
0 : "", |
|
301 : "", |
|
302 : "", |
|
401 : "NORM OF PROJECTED GRADIENT <= PGTOL", |
|
402 : "RELATIVE REDUCTION OF F <= FACTR*EPSMCH", |
|
501 : "CPU EXCEEDING THE TIME LIMIT", |
|
502 : "TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT", |
|
503 : "PROJECTED GRADIENT IS SUFFICIENTLY SMALL", |
|
504 : "TOTAL NO. OF ITERATIONS REACHED LIMIT", |
|
505 : "CALLBACK REQUESTED HALT", |
|
601 : "ROUNDING ERRORS PREVENT PROGRESS", |
|
602 : "STP = STPMAX", |
|
603 : "STP = STPMIN", |
|
604 : "XTOL TEST SATISFIED", |
|
701 : "NO FEASIBLE SOLUTION", |
|
702 : "FACTR < 0", |
|
703 : "FTOL < 0", |
|
704 : "GTOL < 0", |
|
705 : "XTOL < 0", |
|
706 : "STP < STPMIN", |
|
707 : "STP > STPMAX", |
|
708 : "STPMIN < 0", |
|
709 : "STPMAX < STPMIN", |
|
710 : "INITIAL G >= 0", |
|
711 : "M <= 0", |
|
712 : "N <= 0", |
|
713 : "INVALID NBD", |
|
} |
|
|
|
def fmin_l_bfgs_b(func, x0, fprime=None, args=(), |
|
approx_grad=0, |
|
bounds=None, m=10, factr=1e7, pgtol=1e-5, |
|
epsilon=1e-8, |
|
iprint=-1, maxfun=15000, maxiter=15000, disp=None, |
|
callback=None, maxls=20): |
|
""" |
|
Minimize a function func using the L-BFGS-B algorithm. |
|
|
|
Parameters |
|
---------- |
|
func : callable f(x,*args) |
|
Function to minimize. |
|
x0 : ndarray |
|
Initial guess. |
|
fprime : callable fprime(x,*args), optional |
|
The gradient of `func`. If None, then `func` returns the function |
|
value and the gradient (``f, g = func(x, *args)``), unless |
|
`approx_grad` is True in which case `func` returns only ``f``. |
|
args : sequence, optional |
|
Arguments to pass to `func` and `fprime`. |
|
approx_grad : bool, optional |
|
Whether to approximate the gradient numerically (in which case |
|
`func` returns only the function value). |
|
bounds : list, optional |
|
``(min, max)`` pairs for each element in ``x``, defining |
|
the bounds on that parameter. Use None or +-inf for one of ``min`` or |
|
``max`` when there is no bound in that direction. |
|
m : int, optional |
|
The maximum number of variable metric corrections |
|
used to define the limited memory matrix. (The limited memory BFGS |
|
method does not store the full hessian but uses this many terms in an |
|
approximation to it.) |
|
factr : float, optional |
|
The iteration stops when |
|
``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``, |
|
where ``eps`` is the machine precision, which is automatically |
|
generated by the code. Typical values for `factr` are: 1e12 for |
|
low accuracy; 1e7 for moderate accuracy; 10.0 for extremely |
|
high accuracy. See Notes for relationship to `ftol`, which is exposed |
|
(instead of `factr`) by the `scipy.optimize.minimize` interface to |
|
L-BFGS-B. |
|
pgtol : float, optional |
|
The iteration will stop when |
|
``max{|proj g_i | i = 1, ..., n} <= pgtol`` |
|
where ``proj g_i`` is the i-th component of the projected gradient. |
|
epsilon : float, optional |
|
Step size used when `approx_grad` is True, for numerically |
|
calculating the gradient |
|
iprint : int, optional |
|
Deprecated option that previously controlled the text printed on the |
|
screen during the problem solution. Now the code does not emit any |
|
output and this keyword has no function. |
|
|
|
.. deprecated:: 1.15.0 |
|
This keyword is deprecated and will be removed from SciPy 1.17.0. |
|
|
|
disp : int, optional |
|
Deprecated option that previously controlled the text printed on the |
|
screen during the problem solution. Now the code does not emit any |
|
output and this keyword has no function. |
|
|
|
.. deprecated:: 1.15.0 |
|
This keyword is deprecated and will be removed from SciPy 1.17.0. |
|
|
|
maxfun : int, optional |
|
Maximum number of function evaluations. Note that this function |
|
may violate the limit because of evaluating gradients by numerical |
|
differentiation. |
|
maxiter : int, optional |
|
Maximum number of iterations. |
|
callback : callable, optional |
|
Called after each iteration, as ``callback(xk)``, where ``xk`` is the |
|
current parameter vector. |
|
maxls : int, optional |
|
Maximum number of line search steps (per iteration). Default is 20. |
|
|
|
Returns |
|
------- |
|
x : array_like |
|
Estimated position of the minimum. |
|
f : float |
|
Value of `func` at the minimum. |
|
d : dict |
|
Information dictionary. |
|
|
|
* d['warnflag'] is |
|
|
|
- 0 if converged, |
|
- 1 if too many function evaluations or too many iterations, |
|
- 2 if stopped for another reason, given in d['task'] |
|
|
|
* d['grad'] is the gradient at the minimum (should be 0 ish) |
|
* d['funcalls'] is the number of function calls made. |
|
* d['nit'] is the number of iterations. |
|
|
|
See also |
|
-------- |
|
minimize: Interface to minimization algorithms for multivariate |
|
functions. See the 'L-BFGS-B' `method` in particular. Note that the |
|
`ftol` option is made available via that interface, while `factr` is |
|
provided via this interface, where `factr` is the factor multiplying |
|
the default machine floating-point precision to arrive at `ftol`: |
|
``ftol = factr * numpy.finfo(float).eps``. |
|
|
|
Notes |
|
----- |
|
SciPy uses a C-translated and modified version of the Fortran code, |
|
L-BFGS-B v3.0 (released April 25, 2011, BSD-3 licensed). Original Fortran |
|
version was written by Ciyou Zhu, Richard Byrd, Jorge Nocedal and, |
|
Jose Luis Morales. |
|
|
|
References |
|
---------- |
|
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound |
|
Constrained Optimization, (1995), SIAM Journal on Scientific and |
|
Statistical Computing, 16, 5, pp. 1190-1208. |
|
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B, |
|
FORTRAN routines for large scale bound constrained optimization (1997), |
|
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560. |
|
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B, |
|
FORTRAN routines for large scale bound constrained optimization (2011), |
|
ACM Transactions on Mathematical Software, 38, 1. |
|
|
|
Examples |
|
-------- |
|
Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we |
|
define an objective function ``f(m, b) = (y - y_model)**2``, where `y` |
|
describes the observations and `y_model` the prediction of the linear model |
|
as ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, |
|
are arbitrarily chosen as ``(0,5)`` and ``(5,10)`` for this example. |
|
|
|
>>> import numpy as np |
|
>>> from scipy.optimize import fmin_l_bfgs_b |
|
>>> X = np.arange(0, 10, 1) |
|
>>> M = 2 |
|
>>> B = 3 |
|
>>> Y = M * X + B |
|
>>> def func(parameters, *args): |
|
... x = args[0] |
|
... y = args[1] |
|
... m, b = parameters |
|
... y_model = m*x + b |
|
... error = sum(np.power((y - y_model), 2)) |
|
... return error |
|
|
|
>>> initial_values = np.array([0.0, 1.0]) |
|
|
|
>>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y), |
|
... approx_grad=True) |
|
>>> x_opt, f_opt |
|
array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary |
|
|
|
The optimized parameters in ``x_opt`` agree with the ground truth parameters |
|
``m`` and ``b``. Next, let us perform a bound constrained optimization using |
|
the `bounds` parameter. |
|
|
|
>>> bounds = [(0, 5), (5, 10)] |
|
>>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y), |
|
... approx_grad=True, bounds=bounds) |
|
>>> x_opt, f_opt |
|
array([1.65990508, 5.31649385]), 15.721334516453945 # may vary |
|
""" |
|
|
|
if approx_grad: |
|
fun = func |
|
jac = None |
|
elif fprime is None: |
|
fun = MemoizeJac(func) |
|
jac = fun.derivative |
|
else: |
|
fun = func |
|
jac = fprime |
|
|
|
|
|
callback = _wrap_callback(callback) |
|
opts = {'maxcor': m, |
|
'ftol': factr * np.finfo(float).eps, |
|
'gtol': pgtol, |
|
'eps': epsilon, |
|
'maxfun': maxfun, |
|
'maxiter': maxiter, |
|
'callback': callback, |
|
'maxls': maxls} |
|
|
|
res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds, |
|
**opts) |
|
d = {'grad': res['jac'], |
|
'task': res['message'], |
|
'funcalls': res['nfev'], |
|
'nit': res['nit'], |
|
'warnflag': res['status']} |
|
f = res['fun'] |
|
x = res['x'] |
|
|
|
return x, f, d |
|
|
|
|
|
def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None, |
|
disp=None, maxcor=10, ftol=2.2204460492503131e-09, |
|
gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000, |
|
iprint=-1, callback=None, maxls=20, |
|
finite_diff_rel_step=None, **unknown_options): |
|
""" |
|
Minimize a scalar function of one or more variables using the L-BFGS-B |
|
algorithm. |
|
|
|
Options |
|
------- |
|
disp : None or int |
|
Deprecated option that previously controlled the text printed on the |
|
screen during the problem solution. Now the code does not emit any |
|
output and this keyword has no function. |
|
|
|
.. deprecated:: 1.15.0 |
|
This keyword is deprecated and will be removed from SciPy 1.17.0. |
|
|
|
maxcor : int |
|
The maximum number of variable metric corrections used to |
|
define the limited memory matrix. (The limited memory BFGS |
|
method does not store the full hessian but uses this many terms |
|
in an approximation to it.) |
|
ftol : float |
|
The iteration stops when ``(f^k - |
|
f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``. |
|
gtol : float |
|
The iteration will stop when ``max{|proj g_i | i = 1, ..., n} |
|
<= gtol`` where ``proj g_i`` is the i-th component of the |
|
projected gradient. |
|
eps : float or ndarray |
|
If `jac is None` the absolute step size used for numerical |
|
approximation of the jacobian via forward differences. |
|
maxfun : int |
|
Maximum number of function evaluations. Note that this function |
|
may violate the limit because of evaluating gradients by numerical |
|
differentiation. |
|
maxiter : int |
|
Maximum number of iterations. |
|
iprint : int, optional |
|
Deprecated option that previously controlled the text printed on the |
|
screen during the problem solution. Now the code does not emit any |
|
output and this keyword has no function. |
|
|
|
.. deprecated:: 1.15.0 |
|
This keyword is deprecated and will be removed from SciPy 1.17.0. |
|
|
|
maxls : int, optional |
|
Maximum number of line search steps (per iteration). Default is 20. |
|
finite_diff_rel_step : None or array_like, optional |
|
If ``jac in ['2-point', '3-point', 'cs']`` the relative step size to |
|
use for numerical approximation of the jacobian. The absolute step |
|
size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``, |
|
possibly adjusted to fit into the bounds. For ``method='3-point'`` |
|
the sign of `h` is ignored. If None (default) then step is selected |
|
automatically. |
|
|
|
Notes |
|
----- |
|
The option `ftol` is exposed via the `scipy.optimize.minimize` interface, |
|
but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The |
|
relationship between the two is ``ftol = factr * numpy.finfo(float).eps``. |
|
I.e., `factr` multiplies the default machine floating-point precision to |
|
arrive at `ftol`. |
|
|
|
""" |
|
_check_unknown_options(unknown_options) |
|
m = maxcor |
|
pgtol = gtol |
|
factr = ftol / np.finfo(float).eps |
|
|
|
x0 = asarray(x0).ravel() |
|
n, = x0.shape |
|
|
|
|
|
|
|
|
|
if bounds is None: |
|
pass |
|
elif len(bounds) != n: |
|
raise ValueError('length of x0 != length of bounds') |
|
else: |
|
bounds = np.array(old_bound_to_new(bounds)) |
|
|
|
|
|
if (bounds[0] > bounds[1]).any(): |
|
raise ValueError( |
|
"LBFGSB - one of the lower bounds is greater than an upper bound." |
|
) |
|
|
|
|
|
|
|
x0 = np.clip(x0, bounds[0], bounds[1]) |
|
|
|
|
|
sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps, |
|
bounds=bounds, |
|
finite_diff_rel_step=finite_diff_rel_step) |
|
|
|
func_and_grad = sf.fun_and_grad |
|
|
|
nbd = zeros(n, np.int32) |
|
low_bnd = zeros(n, float64) |
|
upper_bnd = zeros(n, float64) |
|
bounds_map = {(-np.inf, np.inf): 0, |
|
(1, np.inf): 1, |
|
(1, 1): 2, |
|
(-np.inf, 1): 3} |
|
|
|
if bounds is not None: |
|
for i in range(0, n): |
|
L, U = bounds[0, i], bounds[1, i] |
|
if not np.isinf(L): |
|
low_bnd[i] = L |
|
L = 1 |
|
if not np.isinf(U): |
|
upper_bnd[i] = U |
|
U = 1 |
|
nbd[i] = bounds_map[L, U] |
|
|
|
if not maxls > 0: |
|
raise ValueError('maxls must be positive.') |
|
|
|
x = array(x0, dtype=np.float64) |
|
f = array(0.0, dtype=np.int32) |
|
g = zeros((n,), dtype=np.int32) |
|
wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64) |
|
iwa = zeros(3*n, dtype=np.int32) |
|
task = zeros(2, dtype=np.int32) |
|
ln_task = zeros(2, dtype=np.int32) |
|
lsave = zeros(4, dtype=np.int32) |
|
isave = zeros(44, dtype=np.int32) |
|
dsave = zeros(29, dtype=float64) |
|
|
|
n_iterations = 0 |
|
|
|
while True: |
|
|
|
|
|
|
|
g = g.astype(np.float64) |
|
|
|
_lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa, |
|
iwa, task, lsave, isave, dsave, maxls, ln_task) |
|
|
|
if task[0] == 3: |
|
|
|
|
|
|
|
|
|
f, g = func_and_grad(x) |
|
elif task[0] == 1: |
|
|
|
n_iterations += 1 |
|
|
|
intermediate_result = OptimizeResult(x=x, fun=f) |
|
if _call_callback_maybe_halt(callback, intermediate_result): |
|
task[0] = 5 |
|
task[1] = 505 |
|
if n_iterations >= maxiter: |
|
task[0] = 5 |
|
task[1] = 504 |
|
elif sf.nfev > maxfun: |
|
task[0] = 5 |
|
task[1] = 502 |
|
else: |
|
break |
|
|
|
if task[0] == 4: |
|
warnflag = 0 |
|
elif sf.nfev > maxfun or n_iterations >= maxiter: |
|
warnflag = 1 |
|
else: |
|
warnflag = 2 |
|
|
|
|
|
|
|
s = wa[0: m*n].reshape(m, n) |
|
y = wa[m*n: 2*m*n].reshape(m, n) |
|
|
|
|
|
n_bfgs_updates = isave[30] |
|
|
|
n_corrs = min(n_bfgs_updates, maxcor) |
|
hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs]) |
|
|
|
msg = status_messages[task[0]] + ": " + task_messages[task[1]] |
|
|
|
return OptimizeResult(fun=f, jac=g, nfev=sf.nfev, |
|
njev=sf.ngev, |
|
nit=n_iterations, status=warnflag, message=msg, |
|
x=x, success=(warnflag == 0), hess_inv=hess_inv) |
|
|
|
|
|
class LbfgsInvHessProduct(LinearOperator): |
|
"""Linear operator for the L-BFGS approximate inverse Hessian. |
|
|
|
This operator computes the product of a vector with the approximate inverse |
|
of the Hessian of the objective function, using the L-BFGS limited |
|
memory approximation to the inverse Hessian, accumulated during the |
|
optimization. |
|
|
|
Objects of this class implement the ``scipy.sparse.linalg.LinearOperator`` |
|
interface. |
|
|
|
Parameters |
|
---------- |
|
sk : array_like, shape=(n_corr, n) |
|
Array of `n_corr` most recent updates to the solution vector. |
|
(See [1]). |
|
yk : array_like, shape=(n_corr, n) |
|
Array of `n_corr` most recent updates to the gradient. (See [1]). |
|
|
|
References |
|
---------- |
|
.. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited |
|
storage." Mathematics of computation 35.151 (1980): 773-782. |
|
|
|
""" |
|
|
|
def __init__(self, sk, yk): |
|
"""Construct the operator.""" |
|
if sk.shape != yk.shape or sk.ndim != 2: |
|
raise ValueError('sk and yk must have matching shape, (n_corrs, n)') |
|
n_corrs, n = sk.shape |
|
|
|
super().__init__(dtype=np.float64, shape=(n, n)) |
|
|
|
self.sk = sk |
|
self.yk = yk |
|
self.n_corrs = n_corrs |
|
self.rho = 1 / np.einsum('ij,ij->i', sk, yk) |
|
|
|
def _matvec(self, x): |
|
"""Efficient matrix-vector multiply with the BFGS matrices. |
|
|
|
This calculation is described in Section (4) of [1]. |
|
|
|
Parameters |
|
---------- |
|
x : ndarray |
|
An array with shape (n,) or (n,1). |
|
|
|
Returns |
|
------- |
|
y : ndarray |
|
The matrix-vector product |
|
|
|
""" |
|
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho |
|
q = np.array(x, dtype=self.dtype, copy=True) |
|
if q.ndim == 2 and q.shape[1] == 1: |
|
q = q.reshape(-1) |
|
|
|
alpha = np.empty(n_corrs) |
|
|
|
for i in range(n_corrs-1, -1, -1): |
|
alpha[i] = rho[i] * np.dot(s[i], q) |
|
q = q - alpha[i]*y[i] |
|
|
|
r = q |
|
for i in range(n_corrs): |
|
beta = rho[i] * np.dot(y[i], r) |
|
r = r + s[i] * (alpha[i] - beta) |
|
|
|
return r |
|
|
|
def todense(self): |
|
"""Return a dense array representation of this operator. |
|
|
|
Returns |
|
------- |
|
arr : ndarray, shape=(n, n) |
|
An array with the same shape and containing |
|
the same data represented by this `LinearOperator`. |
|
|
|
""" |
|
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho |
|
I_arr = np.eye(*self.shape, dtype=self.dtype) |
|
Hk = I_arr |
|
|
|
for i in range(n_corrs): |
|
A1 = I_arr - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i] |
|
A2 = I_arr - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i] |
|
|
|
Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] * |
|
s[i][np.newaxis, :]) |
|
return Hk |
|
|