File size: 10,801 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
"""Trust-region optimization."""
import math
import warnings
import numpy as np
import scipy.linalg
from ._optimize import (_check_unknown_options, _status_message,
OptimizeResult, _prepare_scalar_function,
_call_callback_maybe_halt)
from scipy.optimize._hessian_update_strategy import HessianUpdateStrategy
from scipy.optimize._differentiable_functions import FD_METHODS
__all__ = []
def _wrap_function(function, args):
# wraps a minimizer function to count number of evaluations
# and to easily provide an args kwd.
ncalls = [0]
if function is None:
return ncalls, None
def function_wrapper(x, *wrapper_args):
ncalls[0] += 1
# A copy of x is sent to the user function (gh13740)
return function(np.copy(x), *(wrapper_args + args))
return ncalls, function_wrapper
class BaseQuadraticSubproblem:
"""
Base/abstract class defining the quadratic model for trust-region
minimization. Child classes must implement the ``solve`` method.
Values of the objective function, Jacobian and Hessian (if provided) at
the current iterate ``x`` are evaluated on demand and then stored as
attributes ``fun``, ``jac``, ``hess``.
"""
def __init__(self, x, fun, jac, hess=None, hessp=None):
self._x = x
self._f = None
self._g = None
self._h = None
self._g_mag = None
self._cauchy_point = None
self._newton_point = None
self._fun = fun
self._jac = jac
self._hess = hess
self._hessp = hessp
def __call__(self, p):
return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
@property
def fun(self):
"""Value of objective function at current iteration."""
if self._f is None:
self._f = self._fun(self._x)
return self._f
@property
def jac(self):
"""Value of Jacobian of objective function at current iteration."""
if self._g is None:
self._g = self._jac(self._x)
return self._g
@property
def hess(self):
"""Value of Hessian of objective function at current iteration."""
if self._h is None:
self._h = self._hess(self._x)
return self._h
def hessp(self, p):
if self._hessp is not None:
return self._hessp(self._x, p)
else:
return np.dot(self.hess, p)
@property
def jac_mag(self):
"""Magnitude of jacobian of objective function at current iteration."""
if self._g_mag is None:
self._g_mag = scipy.linalg.norm(self.jac)
return self._g_mag
def get_boundaries_intersections(self, z, d, trust_radius):
"""
Solve the scalar quadratic equation ``||z + t d|| == trust_radius``.
This is like a line-sphere intersection.
Return the two values of t, sorted from low to high.
"""
a = np.dot(d, d)
b = 2 * np.dot(z, d)
c = np.dot(z, z) - trust_radius**2
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
# The following calculation is mathematically
# equivalent to:
# ta = (-b - sqrt_discriminant) / (2*a)
# tb = (-b + sqrt_discriminant) / (2*a)
# but produce smaller round off errors.
# Look at Matrix Computation p.97
# for a better justification.
aux = b + math.copysign(sqrt_discriminant, b)
ta = -aux / (2*a)
tb = -2*c / aux
return sorted([ta, tb])
def solve(self, trust_radius):
raise NotImplementedError('The solve method should be implemented by '
'the child class')
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
subproblem=None, initial_trust_radius=1.0,
max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
maxiter=None, disp=False, return_all=False,
callback=None, inexact=True, **unknown_options):
"""
Minimization of scalar function of one or more variables using a
trust-region algorithm.
Options for the trust-region algorithm are:
initial_trust_radius : float
Initial trust radius.
max_trust_radius : float
Never propose steps that are longer than this value.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than `gtol`
before successful termination.
maxiter : int
Maximum number of iterations to perform.
disp : bool
If True, print convergence message.
inexact : bool
Accuracy to solve subproblems. If True requires less nonlinear
iterations, but more vector products. Only effective for method
trust-krylov.
This function is called by the `minimize` function.
It is not supposed to be called directly.
"""
_check_unknown_options(unknown_options)
if jac is None:
raise ValueError('Jacobian is currently required for trust-region '
'methods')
if hess is None and hessp is None:
raise ValueError('Either the Hessian or the Hessian-vector product '
'is currently required for trust-region methods')
if subproblem is None:
raise ValueError('A subproblem solving strategy is required for '
'trust-region methods')
if not (0 <= eta < 0.25):
raise Exception('invalid acceptance stringency')
if max_trust_radius <= 0:
raise Exception('the max trust radius must be positive')
if initial_trust_radius <= 0:
raise ValueError('the initial trust radius must be positive')
if initial_trust_radius >= max_trust_radius:
raise ValueError('the initial trust radius must be less than the '
'max trust radius')
# force the initial guess into a nice format
x0 = np.asarray(x0).flatten()
# A ScalarFunction representing the problem. This caches calls to fun, jac,
# hess.
sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args)
fun = sf.fun
jac = sf.grad
if callable(hess):
hess = sf.hess
elif callable(hessp):
# this elif statement must come before examining whether hess
# is estimated by FD methods or a HessianUpdateStrategy
pass
elif (hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)):
# If the Hessian is being estimated by finite differences or a
# Hessian update strategy then ScalarFunction.hess returns a
# LinearOperator or a HessianUpdateStrategy. This enables the
# calculation/creation of a hessp. BUT you only want to do this
# if the user *hasn't* provided a callable(hessp) function.
hess = None
def hessp(x, p, *args):
return sf.hess(x).dot(p)
else:
raise ValueError('Either the Hessian or the Hessian-vector product '
'is currently required for trust-region methods')
# ScalarFunction doesn't represent hessp
nhessp, hessp = _wrap_function(hessp, args)
# limit the number of iterations
if maxiter is None:
maxiter = len(x0)*200
# init the search status
warnflag = 0
# initialize the search
trust_radius = initial_trust_radius
x = x0
if return_all:
allvecs = [x]
m = subproblem(x, fun, jac, hess, hessp)
k = 0
# search for the function min
# do not even start if the gradient is small enough
while m.jac_mag >= gtol:
# Solve the sub-problem.
# This gives us the proposed step relative to the current position
# and it tells us whether the proposed step
# has reached the trust region boundary or not.
try:
p, hits_boundary = m.solve(trust_radius)
except np.linalg.LinAlgError:
warnflag = 3
break
# calculate the predicted value at the proposed point
predicted_value = m(p)
# define the local approximation at the proposed point
x_proposed = x + p
m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
# evaluate the ratio defined in equation (4.4)
actual_reduction = m.fun - m_proposed.fun
predicted_reduction = m.fun - predicted_value
if predicted_reduction <= 0:
warnflag = 2
break
rho = actual_reduction / predicted_reduction
# update the trust radius according to the actual/predicted ratio
if rho < 0.25:
trust_radius *= 0.25
elif rho > 0.75 and hits_boundary:
trust_radius = min(2*trust_radius, max_trust_radius)
# if the ratio is high enough then accept the proposed step
if rho > eta:
x = x_proposed
m = m_proposed
# append the best guess, call back, increment the iteration count
if return_all:
allvecs.append(np.copy(x))
k += 1
intermediate_result = OptimizeResult(x=x, fun=m.fun)
if _call_callback_maybe_halt(callback, intermediate_result):
break
# check if the gradient is small enough to stop
if m.jac_mag < gtol:
warnflag = 0
break
# check if we have looked at enough iterations
if k >= maxiter:
warnflag = 1
break
# print some stuff if requested
status_messages = (
_status_message['success'],
_status_message['maxiter'],
'A bad approximation caused failure to predict improvement.',
'A linalg error occurred, such as a non-psd Hessian.',
)
if disp:
if warnflag == 0:
print(status_messages[warnflag])
else:
warnings.warn(status_messages[warnflag], RuntimeWarning, stacklevel=3)
print(f" Current function value: {m.fun:f}")
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % sf.nfev)
print(" Gradient evaluations: %d" % sf.ngev)
print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0]))
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
nhev=sf.nhev + nhessp[0], nit=k,
message=status_messages[warnflag])
if hess is not None:
result['hess'] = m.hess
if return_all:
result['allvecs'] = allvecs
return result
|