// Copyright (C) 2010 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_OPTIMIZATION_TRUST_REGIoN_H_ABSTRACTh_ #ifdef DLIB_OPTIMIZATION_TRUST_REGIoN_H_ABSTRACTh_ #include "../matrix/matrix_abstract.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename EXP1, typename EXP2, typename T, long NR, long NC, typename MM, typename L > unsigned long solve_trust_region_subproblem ( const matrix_exp<EXP1>& B, const matrix_exp<EXP2>& g, const typename EXP1::type radius, matrix<T,NR,NC,MM,L>& p, double eps, unsigned long max_iter ); /*! requires - B == trans(B) (i.e. B should be a symmetric matrix) - B.nr() == B.nc() - is_col_vector(g) == true - g.size() == B.nr() - p is capable of containing a column vector the size of g (i.e. p = g; should be a legal expression) - radius > 0 - eps > 0 - max_iter > 0 ensures - This function solves the following optimization problem: Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p subject to the following constraint: - length(p) <= radius - returns the number of iterations performed. If this method fails to converge to eps accuracy then the number returned will be max_iter+1. - if (this function didn't terminate due to hitting the max_iter iteration limit) then - if this function returns 0 or 1 then we are not hitting the radius bound Otherwise, the radius constraint is active and std::abs(length(#p)-radius)/radius <= eps. !*/ // ---------------------------------------------------------------------------------------- template < typename EXP1, typename EXP2, typename T, long NR, long NC, typename MM, typename L, typename EXP3 > void solve_trust_region_subproblem_bounded ( const matrix_exp<EXP1>& B, const matrix_exp<EXP2>& g, const typename EXP1::type radius, matrix<T,NR,NC,MM,L>& p, double eps, unsigned long max_iter, const matrix_exp<EXP3>& lower, const matrix_exp<EXP3>& upper ); /*! requires - B == trans(B) (i.e. B should be a symmetric matrix) - B.nr() == B.nc() - is_col_vector(g) == true - is_col_vector(lower) == true - is_col_vector(upper) == true - g.size() == B.nr() - lower.size() == B.nr() - upper.size() == B.nr() - p is capable of containing a column vector the size of g (i.e. p = g; should be a legal expression) - radius > 0 - eps > 0 - max_iter > 0 - min(upper-lower) >= 0 - length(clamp(zeros_matrix(lower),lower,upper)) <= radius (i.e. the lower and upper bounds can't exclude all points with the desired radius.) ensures - This function solves the following optimization problem: Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p subject to the following constraints: - length(p) <= radius - lower(i) <= p(i) <= upper(i), for all i - Solves the problem to eps accuracy. We do this by greedily finding the most violated bound constraint, locking that variable to its constrained value, removing it from the problem, and then resolving. We do that until no more constraint violations are present. Each time we just call solve_trust_region_subproblem() to get the solution and pass eps and max_iter directly to these calls to solve_trust_region_subproblem(). !*/ // ---------------------------------------------------------------------------------------- class function_model { /*! WHAT THIS OBJECT REPRESENTS This object defines the interface for a function model used by the trust-region optimizers defined below. In particular, this object represents a function f() and its associated derivative and hessian. !*/ public: // Define the type used to represent column vectors typedef matrix<double,0,1> column_vector; // Define the type used to represent the hessian matrix typedef matrix<double> general_matrix; double operator() ( const column_vector& x ) const; /*! ensures - returns f(x) (i.e. evaluates this model at the given point and returns the value) !*/ void get_derivative_and_hessian ( const column_vector& x, column_vector& d, general_matrix& h ) const; /*! ensures - #d == the derivative of f() at x - #h == the hessian matrix of f() at x - is_col_vector(#d) == true - #d.size() == x.size() - #h.nr() == #h.nc() == x.size() - #h == trans(#h) !*/ }; // ---------------------------------------------------------------------------------------- template < typename stop_strategy_type, typename funct_model > double find_min_trust_region ( stop_strategy_type stop_strategy, const funct_model& model, typename funct_model::column_vector& x, double radius = 1 ); /*! requires - stop_strategy == an object that defines a stop strategy such as one of the objects from dlib/optimization/optimization_stop_strategies_abstract.h - is_col_vector(x) == true - radius > 0 - model must be an object with an interface as defined by the function_model example object shown above. ensures - Performs an unconstrained minimization of the function defined by model starting from the initial point x. This function uses a trust region algorithm to perform the minimization. The radius parameter defines the initial size of the trust region. - The function is optimized until stop_strategy decides that an acceptable point has been found or the trust region subproblem fails to make progress. - #x == the value of x that was found to minimize model() - returns model(#x). - When this function makes calls to model.get_derivative_and_hessian() it always does so by first calling model() and then calling model.get_derivative_and_hessian(). That is, any call to model.get_derivative_and_hessian(val) will always be preceded by a call to model(val) with the same value. This way you can reuse any redundant computations performed by model() and model.get_derivative_and_hessian() as appropriate. !*/ // ---------------------------------------------------------------------------------------- template < typename stop_strategy_type, typename funct_model > double find_max_trust_region ( stop_strategy_type stop_strategy, const funct_model& model, typename funct_model::column_vector& x, double radius = 1 ); /*! requires - stop_strategy == an object that defines a stop strategy such as one of the objects from dlib/optimization/optimization_stop_strategies_abstract.h - is_col_vector(x) == true - radius > 0 - model must be an object with an interface as defined by the function_model example object shown above. ensures - Performs an unconstrained maximization of the function defined by model starting from the initial point x. This function uses a trust region algorithm to perform the maximization. The radius parameter defines the initial size of the trust region. - The function is optimized until stop_strategy decides that an acceptable point has been found or the trust region subproblem fails to make progress. - #x == the value of x that was found to maximize model() - returns model(#x). - When this function makes calls to model.get_derivative_and_hessian() it always does so by first calling model() and then calling model.get_derivative_and_hessian(). That is, any call to model.get_derivative_and_hessian(val) will always be preceded by a call to model(val) with the same value. This way you can reuse any redundant computations performed by model() and model.get_derivative_and_hessian() as appropriate. - Note that this function solves the maximization problem by converting it into a minimization problem. Therefore, the values of model() and its derivative reported to the stopping strategy will be negated. That is, stop_strategy will see -model() and -derivative. All this really means is that the status messages from a stopping strategy in verbose mode will display a negated objective value. !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_OPTIMIZATION_TRUST_REGIoN_H_ABSTRACTh_