File size: 15,558 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 |
"""Nearly exact trust-region optimization subproblem."""
import numpy as np
from scipy.linalg import (norm, get_lapack_funcs, solve_triangular,
cho_solve)
from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
__all__ = ['_minimize_trustregion_exact',
'estimate_smallest_singular_value',
'singular_leading_submatrix',
'IterativeSubproblem']
def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None,
**trust_region_options):
"""
Minimization of scalar function of one or more variables using
a nearly exact trust-region algorithm.
Options
-------
initial_trust_radius : float
Initial trust-region radius.
max_trust_radius : float
Maximum value of the trust-region radius. No steps that are longer
than this value will be proposed.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than ``gtol`` before successful
termination.
"""
if jac is None:
raise ValueError('Jacobian is required for trust region '
'exact minimization.')
if not callable(hess):
raise ValueError('Hessian matrix is required for trust region '
'exact minimization.')
return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
subproblem=IterativeSubproblem,
**trust_region_options)
def estimate_smallest_singular_value(U):
"""Given upper triangular matrix ``U`` estimate the smallest singular
value and the correspondent right singular vector in O(n**2) operations.
Parameters
----------
U : ndarray
Square upper triangular matrix.
Returns
-------
s_min : float
Estimated smallest singular value of the provided matrix.
z_min : ndarray
Estimated right singular vector.
Notes
-----
The procedure is based on [1]_ and is done in two steps. First, it finds
a vector ``e`` with components selected from {+1, -1} such that the
solution ``w`` from the system ``U.T w = e`` is as large as possible.
Next it estimate ``U v = w``. The smallest singular value is close
to ``norm(w)/norm(v)`` and the right singular vector is close
to ``v/norm(v)``.
The estimation will be better more ill-conditioned is the matrix.
References
----------
.. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
An estimate for the condition number of a matrix. 1979.
SIAM Journal on Numerical Analysis, 16(2), 368-375.
"""
U = np.atleast_2d(U)
m, n = U.shape
if m != n:
raise ValueError("A square triangular matrix should be provided.")
# A vector `e` with components selected from {+1, -1}
# is selected so that the solution `w` to the system
# `U.T w = e` is as large as possible. Implementation
# based on algorithm 3.5.1, p. 142, from reference [2]
# adapted for lower triangular matrix.
p = np.zeros(n)
w = np.empty(n)
# Implemented according to: Golub, G. H., Van Loan, C. F. (2013).
# "Matrix computations". Forth Edition. JHU press. pp. 140-142.
for k in range(n):
wp = (1-p[k]) / U.T[k, k]
wm = (-1-p[k]) / U.T[k, k]
pp = p[k+1:] + U.T[k+1:, k]*wp
pm = p[k+1:] + U.T[k+1:, k]*wm
if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1):
w[k] = wp
p[k+1:] = pp
else:
w[k] = wm
p[k+1:] = pm
# The system `U v = w` is solved using backward substitution.
v = solve_triangular(U, w)
v_norm = norm(v)
w_norm = norm(w)
# Smallest singular value
s_min = w_norm / v_norm
# Associated vector
z_min = v / v_norm
return s_min, z_min
def gershgorin_bounds(H):
"""
Given a square matrix ``H`` compute upper
and lower bounds for its eigenvalues (Gregoshgorin Bounds).
Defined ref. [1].
References
----------
.. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
Trust region methods. 2000. Siam. pp. 19.
"""
H_diag = np.diag(H)
H_diag_abs = np.abs(H_diag)
H_row_sums = np.sum(np.abs(H), axis=1)
lb = np.min(H_diag + H_diag_abs - H_row_sums)
ub = np.max(H_diag - H_diag_abs + H_row_sums)
return lb, ub
def singular_leading_submatrix(A, U, k):
"""
Compute term that makes the leading ``k`` by ``k``
submatrix from ``A`` singular.
Parameters
----------
A : ndarray
Symmetric matrix that is not positive definite.
U : ndarray
Upper triangular matrix resulting of an incomplete
Cholesky decomposition of matrix ``A``.
k : int
Positive integer such that the leading k by k submatrix from
`A` is the first non-positive definite leading submatrix.
Returns
-------
delta : float
Amount that should be added to the element (k, k) of the
leading k by k submatrix of ``A`` to make it singular.
v : ndarray
A vector such that ``v.T B v = 0``. Where B is the matrix A after
``delta`` is added to its element (k, k).
"""
# Compute delta
delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
n = len(A)
# Initialize v
v = np.zeros(n)
v[k-1] = 1
# Compute the remaining values of v by solving a triangular system.
if k != 1:
v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
return delta, v
class IterativeSubproblem(BaseQuadraticSubproblem):
"""Quadratic subproblem solved by nearly exact iterative method.
Notes
-----
This subproblem solver was based on [1]_, [2]_ and [3]_,
which implement similar algorithms. The algorithm is basically
that of [1]_ but ideas from [2]_ and [3]_ were also used.
References
----------
.. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods",
Siam, pp. 169-200, 2000.
.. [2] J. Nocedal and S. Wright, "Numerical optimization",
Springer Science & Business Media. pp. 83-91, 2006.
.. [3] J.J. More and D.C. Sorensen, "Computing a trust region step",
SIAM Journal on Scientific and Statistical Computing, vol. 4(3),
pp. 553-572, 1983.
"""
# UPDATE_COEFF appears in reference [1]_
# in formula 7.3.14 (p. 190) named as "theta".
# As recommended there it value is fixed in 0.01.
UPDATE_COEFF = 0.01
EPS = np.finfo(float).eps
def __init__(self, x, fun, jac, hess, hessp=None,
k_easy=0.1, k_hard=0.2):
super().__init__(x, fun, jac, hess)
# When the trust-region shrinks in two consecutive
# calculations (``tr_radius < previous_tr_radius``)
# the lower bound ``lambda_lb`` may be reused,
# facilitating the convergence. To indicate no
# previous value is known at first ``previous_tr_radius``
# is set to -1 and ``lambda_lb`` to None.
self.previous_tr_radius = -1
self.lambda_lb = None
self.niter = 0
# ``k_easy`` and ``k_hard`` are parameters used
# to determine the stop criteria to the iterative
# subproblem solver. Take a look at pp. 194-197
# from reference _[1] for a more detailed description.
self.k_easy = k_easy
self.k_hard = k_hard
# Get Lapack function for cholesky decomposition.
# The implemented SciPy wrapper does not return
# the incomplete factorization needed by the method.
self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
# Get info about Hessian
self.dimension = len(self.hess)
self.hess_gershgorin_lb,\
self.hess_gershgorin_ub = gershgorin_bounds(self.hess)
self.hess_inf = norm(self.hess, np.inf)
self.hess_fro = norm(self.hess, 'fro')
# A constant such that for vectors smaller than that
# backward substitution is not reliable. It was established
# based on Golub, G. H., Van Loan, C. F. (2013).
# "Matrix computations". Forth Edition. JHU press., p.165.
self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
def _initial_values(self, tr_radius):
"""Given a trust radius, return a good initial guess for
the damping factor, the lower bound and the upper bound.
The values were chosen accordingly to the guidelines on
section 7.3.8 (p. 192) from [1]_.
"""
# Upper bound for the damping factor
lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb,
self.hess_fro,
self.hess_inf))
# Lower bound for the damping factor
lambda_lb = max(0, -min(self.hess.diagonal()),
self.jac_mag/tr_radius - min(self.hess_gershgorin_ub,
self.hess_fro,
self.hess_inf))
# Improve bounds with previous info
if tr_radius < self.previous_tr_radius:
lambda_lb = max(self.lambda_lb, lambda_lb)
# Initial guess for the damping factor
if lambda_lb == 0:
lambda_initial = 0
else:
lambda_initial = max(np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
return lambda_initial, lambda_lb, lambda_ub
def solve(self, tr_radius):
"""Solve quadratic subproblem"""
lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
n = self.dimension
hits_boundary = True
already_factorized = False
self.niter = 0
while True:
# Compute Cholesky factorization
if already_factorized:
already_factorized = False
else:
H = self.hess+lambda_current*np.eye(n)
U, info = self.cholesky(H, lower=False,
overwrite_a=False,
clean=True)
self.niter += 1
# Check if factorization succeeded
if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
# Successful factorization
# Solve `U.T U p = s`
p = cho_solve((U, False), -self.jac)
p_norm = norm(p)
# Check for interior convergence
if p_norm <= tr_radius and lambda_current == 0:
hits_boundary = False
break
# Solve `U.T w = p`
w = solve_triangular(U, p, trans='T')
w_norm = norm(w)
# Compute Newton step accordingly to
# formula (4.44) p.87 from ref [2]_.
delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius
lambda_new = lambda_current + delta_lambda
if p_norm < tr_radius: # Inside boundary
s_min, z_min = estimate_smallest_singular_value(U)
ta, tb = self.get_boundaries_intersections(p, z_min,
tr_radius)
# Choose `step_len` with the smallest magnitude.
# The reason for this choice is explained at
# ref [3]_, p. 6 (Immediately before the formula
# for `tau`).
step_len = min([ta, tb], key=abs)
# Compute the quadratic term (p.T*H*p)
quadratic_term = np.dot(p, np.dot(H, p))
# Check stop criteria
relative_error = ((step_len**2 * s_min**2)
/ (quadratic_term + lambda_current*tr_radius**2))
if relative_error <= self.k_hard:
p += step_len * z_min
break
# Update uncertainty bounds
lambda_ub = lambda_current
lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Compute Cholesky factorization
H = self.hess + lambda_new*np.eye(n)
c, info = self.cholesky(H, lower=False,
overwrite_a=False,
clean=True)
# Check if the factorization have succeeded
#
if info == 0: # Successful factorization
# Update damping factor
lambda_current = lambda_new
already_factorized = True
else: # Unsuccessful factorization
# Update uncertainty bounds
lambda_lb = max(lambda_lb, lambda_new)
# Update damping factor
lambda_current = max(
np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)
)
else: # Outside boundary
# Check stop criteria
relative_error = abs(p_norm - tr_radius) / tr_radius
if relative_error <= self.k_easy:
break
# Update uncertainty bounds
lambda_lb = lambda_current
# Update damping factor
lambda_current = lambda_new
elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
# jac_mag very close to zero
# Check for interior convergence
if lambda_current == 0:
p = np.zeros(n)
hits_boundary = False
break
s_min, z_min = estimate_smallest_singular_value(U)
step_len = tr_radius
# Check stop criteria
if (step_len**2 * s_min**2
<= self.k_hard * lambda_current * tr_radius**2):
p = step_len * z_min
break
# Update uncertainty bounds
lambda_ub = lambda_current
lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Update damping factor
lambda_current = max(
np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)
)
else: # Unsuccessful factorization
# Compute auxiliary terms
delta, v = singular_leading_submatrix(H, U, info)
v_norm = norm(v)
# Update uncertainty interval
lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
# Update damping factor
lambda_current = max(
np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)
)
self.lambda_lb = lambda_lb
self.lambda_current = lambda_current
self.previous_tr_radius = tr_radius
return p, hits_boundary
|