#include "global_function_search.h" #include "upper_bound_function.h" #include "../optimization.h" namespace dlib { // ---------------------------------------------------------------------------------------- namespace qopt_impl { void fit_quadratic_to_points_mse( const matrix& X, const matrix& Y, matrix& H, matrix& g, double& c ) { DLIB_CASSERT(X.size() > 0); DLIB_CASSERT(X.nc() == Y.size()); DLIB_CASSERT(X.nc() >= (X.nr()+1)*(X.nr()+2)/2); const long dims = X.nr(); const long M = X.nc(); matrix W((X.nr()+1)*(X.nr()+2)/2, M); set_subm(W, 0,0, dims, M) = X; set_subm(W, dims,0, 1, M) = 1; for (long c = 0; c < X.nc(); ++c) { long wr = dims+1; for (long r = 0; r < X.nr(); ++r) { for (long r2 = r; r2 < X.nr(); ++r2) { W(wr,c) = X(r,c)*X(r2,c); if (r2 == r) W(wr,c) *= 0.5; ++wr; } } } matrix z = pinv(trans(W))*Y; c = z(dims); g = rowm(z, range(0,dims-1)); H.set_size(dims,dims); long wr = dims+1; for (long r = 0; r < X.nr(); ++r) { for (long r2 = r; r2 < X.nr(); ++r2) { H(r,r2) = H(r2,r) = z(wr++); } } } // ---------------------------------------------------------------------------------------- void fit_quadratic_to_points( const matrix& X, const matrix& Y, matrix& H, matrix& g, double& c ) /*! requires - X.size() > 0 - X.nc() == Y.size() - X.nr()+1 <= X.nc() ensures - This function finds a quadratic function, Q(x), that interpolates the given set of points. If there aren't enough points to uniquely define Q(x) then the Q(x) that fits the given points with the minimum Frobenius norm hessian matrix is selected. - To be precise: - Let: Q(x) == 0.5*trans(x)*H*x + trans(x)*g + c - Then this function finds H, g, and c that minimizes the following: sum(squared(H)) such that: Q(colm(X,i)) == Y(i), for all valid i - If there are more points than necessary to constrain Q then the Q that best interpolates the function in the mean squared sense is found. !*/ { DLIB_CASSERT(X.size() > 0); DLIB_CASSERT(X.nc() == Y.size()); DLIB_CASSERT(X.nr()+1 <= X.nc()); if (X.nc() >= (X.nr()+1)*(X.nr()+2)/2) { fit_quadratic_to_points_mse(X,Y,H,g,c); return; } const long dims = X.nr(); const long M = X.nc(); /* Our implementation uses the equations 3.9 - 3.12 from the paper: The NEWUOA software for unconstrained optimization without derivatives By M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice, Italy, 2004) */ matrix W(M + dims + 1, M + dims + 1); set_subm(W, 0, 0, M, M) = 0.5*squared(tmp(trans(X)*X)); set_subm(W, 0, M, M, 1) = 1; set_subm(W, M, 0, 1, M) = 1; set_subm(W, M, M, dims+1, dims+1) = 0; set_subm(W, 0, M+1, X.nc(), X.nr()) = trans(X); set_subm(W, M+1, 0, X.nr(), X.nc()) = X; const matrix r = join_cols(Y, zeros_matrix(dims+1,1)); //matrix z = pinv(W)*r; lu_decomposition lu(W); matrix z = lu.solve(r); //if (lu.is_singular()) std::cout << "WARNING, THE W MATRIX IS SINGULAR!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; matrix lambda = rowm(z, range(0,M-1)); c = z(M); g = rowm(z, range(M+1,z.size()-1)); H = X*diagm(lambda)*trans(X); } // ---------------------------------------------------------------------------------------- struct quad_interp_result { quad_interp_result() = default; template quad_interp_result( const matrix_exp& best_x, double predicted_improvement ) : best_x(best_x), predicted_improvement(predicted_improvement) {} matrix best_x; double predicted_improvement = std::numeric_limits::quiet_NaN(); }; // ---------------------------------------------------------------------------------------- quad_interp_result find_max_quadraticly_interpolated_vector ( const matrix& anchor, const double radius, const std::vector>& x, const std::vector& y, const matrix& lower, const matrix& upper ) { DLIB_CASSERT(x.size() == y.size()); DLIB_CASSERT(x.size() > 0); for (size_t i = 0; i < x.size(); ++i) DLIB_CASSERT(anchor.size() == x[i].size()); const long x_size = static_cast(x.size()); DLIB_CASSERT(anchor.size()+1 <= x_size && x_size <= (anchor.size()+1)*(anchor.size()+2)/2); matrix X(anchor.size(), x.size()); matrix Y(x.size()); for (size_t i = 0; i < x.size(); ++i) { set_colm(X,i) = x[i] - anchor; Y(i) = y[i]; } matrix H; matrix g; double c; fit_quadratic_to_points(X, Y, H, g, c); matrix p; solve_trust_region_subproblem_bounded(-H,-g, radius, p, 0.001, 500, lower-anchor, upper-anchor); // ensure we never move more than radius from the anchor. This might happen if the // trust region subproblem isn't solved accurately enough. if (length(p) >= radius) p *= radius/length(p); double predicted_improvement = 0.5*trans(p)*H*p + trans(p)*g; return quad_interp_result{clamp(anchor+p,lower,upper), predicted_improvement}; } // ---------------------------------------------------------------------------------------- quad_interp_result pick_next_sample_using_trust_region ( const std::vector& samples, double& radius, const matrix& lower, const matrix& upper, const std::vector& is_integer_variable ) { DLIB_CASSERT(samples.size() > 0); // We don't use the QP to optimize integer variables. Instead, we just fix them at // their best observed value and use the QP to optimize the real variables. So the // number of dimensions, as far as the QP is concerned, is the number of non-integer // variables. size_t dims = 0; for (auto is_int : is_integer_variable) { if (!is_int) ++dims; } DLIB_CASSERT(samples.size() >= dims+1); // Use enough points to fill out a quadratic model or the max available if we don't // have quite enough. const long N = std::min(samples.size(), (dims+1)*(dims+2)/2); // first find the best sample; double best_val = -1e300; matrix best_x; for (auto& v : samples) { if (v.y > best_val) { best_val = v.y; best_x = v.x; } } // if there are only integer variables then there isn't really anything to do. So just // return the best_x and say there is no improvement. if (dims == 0) return quad_interp_result(best_x, 0); matrix active_dims(dims); long j = 0; for (size_t i = 0; i < is_integer_variable.size(); ++i) { if (!is_integer_variable[i]) active_dims(j++) = i; } // now find the N-1 nearest neighbors of best_x std::vector> distances; for (size_t i = 0; i < samples.size(); ++i) distances.emplace_back(length(best_x-samples[i].x), i); std::sort(distances.begin(), distances.end()); distances.resize(N); std::vector> x; std::vector y; for (auto& idx : distances) { x.emplace_back(rowm(samples[idx.second].x, active_dims)); y.emplace_back(samples[idx.second].y); } if (radius == 0) { for (auto& idx : distances) radius = std::max(radius, length(rowm(best_x-samples[idx.second].x, active_dims)) ); // Shrink the radius a little so we are always going to be making the sampling of // points near the best current point smaller. radius *= 0.95; } auto tmp = find_max_quadraticly_interpolated_vector(rowm(best_x,active_dims), radius, x, y, rowm(lower,active_dims), rowm(upper,active_dims)); // stick the integer variables back into the solution for (long i = 0; i < active_dims.size(); ++i) best_x(active_dims(i)) = tmp.best_x(i); tmp.best_x = best_x; return tmp; } // ---------------------------------------------------------------------------------------- matrix make_random_vector( dlib::rand& rnd, const matrix& lower, const matrix& upper, const std::vector& is_integer_variable ) { matrix temp(lower.size()); for (long i = 0; i < temp.size(); ++i) { temp(i) = rnd.get_double_in_range(lower(i), upper(i)); if (is_integer_variable[i]) temp(i) = std::round(temp(i)); } return temp; } // ---------------------------------------------------------------------------------------- struct max_upper_bound_function { max_upper_bound_function() = default; template max_upper_bound_function( const matrix_exp& x, double predicted_improvement, double upper_bound ) : x(x), predicted_improvement(predicted_improvement), upper_bound(upper_bound) {} matrix x; double predicted_improvement = 0; double upper_bound = 0; }; // ------------------------------------------------------------------------------------ max_upper_bound_function pick_next_sample_as_max_upper_bound ( dlib::rand& rnd, const upper_bound_function& ub, const matrix& lower, const matrix& upper, const std::vector& is_integer_variable, const size_t num_random_samples ) { DLIB_CASSERT(ub.num_points() > 0); // now do a simple random search to find the maximum upper bound double best_ub_so_far = -std::numeric_limits::infinity(); matrix vtemp(lower.size()), v; for (size_t rounds = 0; rounds < num_random_samples; ++rounds) { vtemp = make_random_vector(rnd, lower, upper, is_integer_variable); double bound = ub(vtemp); if (bound > best_ub_so_far) { best_ub_so_far = bound; v = vtemp; } } double max_value = -std::numeric_limits::infinity(); for (auto& v : ub.get_points()) max_value = std::max(max_value, v.y); return max_upper_bound_function(v, best_ub_so_far - max_value, best_ub_so_far); } } // end of namespace qopt_impl; using namespace qopt_impl; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- function_spec::function_spec( matrix bound1, matrix bound2 ) : lower(std::move(bound1)), upper(std::move(bound2)) { DLIB_CASSERT(lower.size() == upper.size()); for (long i = 0; i < lower.size(); ++i) { if (upper(i) < lower(i)) std::swap(lower(i), upper(i)); DLIB_CASSERT(upper(i) != lower(i), "The upper and lower bounds can't be equal."); } is_integer_variable.assign(lower.size(), false); } // ---------------------------------------------------------------------------------------- function_spec::function_spec( matrix bound1, matrix bound2, std::vector is_integer ) : function_spec(std::move(bound1),std::move(bound2)) { is_integer_variable = std::move(is_integer); DLIB_CASSERT(lower.size() == (long)is_integer_variable.size()); // Make sure any integer variables have integer bounds. for (size_t i = 0; i < is_integer_variable.size(); ++i) { if (is_integer_variable[i]) { DLIB_CASSERT(std::round(lower(i)) == lower(i), "If you say a variable is an integer variable then it must have an integer lower bound. \n" << "lower[i] = " << lower(i)); DLIB_CASSERT(std::round(upper(i)) == upper(i), "If you say a variable is an integer variable then it must have an integer upper bound. \n" << "upper[i] = " << upper(i)); } } } // ---------------------------------------------------------------------------------------- namespace gopt_impl { upper_bound_function funct_info::build_upper_bound_with_all_function_evals ( ) const { upper_bound_function tmp(ub); // we are going to add the outstanding evals into this and assume the // outstanding evals are going to take y values equal to their nearest // neighbor complete evals. for (auto& eval : outstanding_evals) { function_evaluation e; e.x = eval.x; e.y = find_nn(ub.get_points(), eval.x); tmp.add(e); } return tmp; } // ------------------------------------------------------------------------------------ double funct_info::find_nn ( const std::vector& evals, const matrix& x ) { double best_y = 0; double best_dist = std::numeric_limits::infinity(); for (auto& v : evals) { double dist = length_squared(v.x-x); if (dist < best_dist) { best_dist = dist; best_y = v.y; } } return best_y; } } // end namespace gopt_impl // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- function_evaluation_request::function_evaluation_request( function_evaluation_request&& item ) { m_has_been_evaluated = item.m_has_been_evaluated; req = item.req; info = item.info; item.info.reset(); item.m_has_been_evaluated = true; } // ---------------------------------------------------------------------------------------- function_evaluation_request& function_evaluation_request:: operator=( function_evaluation_request&& item ) { function_evaluation_request(std::move(item)).swap(*this); return *this; } // ---------------------------------------------------------------------------------------- void function_evaluation_request:: swap( function_evaluation_request& item ) { std::swap(m_has_been_evaluated, item.m_has_been_evaluated); std::swap(req, item.req); std::swap(info, item.info); } // ---------------------------------------------------------------------------------------- size_t function_evaluation_request:: function_idx ( ) const { return info->function_idx; } const matrix& function_evaluation_request:: x ( ) const { return req.x; } // ---------------------------------------------------------------------------------------- bool function_evaluation_request:: has_been_evaluated ( ) const { return m_has_been_evaluated; } // ---------------------------------------------------------------------------------------- function_evaluation_request:: ~function_evaluation_request() { if (!m_has_been_evaluated) { std::lock_guard lock(*info->m); // remove the evaluation request from the outstanding list. auto i = std::find(info->outstanding_evals.begin(), info->outstanding_evals.end(), req); info->outstanding_evals.erase(i); } } // ---------------------------------------------------------------------------------------- void function_evaluation_request:: set ( double y ) { DLIB_CASSERT(has_been_evaluated() == false); std::lock_guard lock(*info->m); m_has_been_evaluated = true; // move the evaluation from outstanding to complete auto i = std::find(info->outstanding_evals.begin(), info->outstanding_evals.end(), req); DLIB_CASSERT(i != info->outstanding_evals.end()); info->outstanding_evals.erase(i); info->ub.add(function_evaluation(req.x,y)); // Now do trust region radius maintenance and keep track of the best objective // values and all that. if (req.was_trust_region_generated_request) { // Adjust trust region radius based on how good this evaluation // was. double measured_improvement = y-req.anchor_objective_value; double rho = measured_improvement/std::abs(req.predicted_improvement); //std::cout << "rho: "<< rho << std::endl; //std::cout << "radius: "<< info->radius << std::endl; if (rho < 0.25) info->radius *= 0.5; else if (rho > 0.75) info->radius *= 2; } if (y > info->best_objective_value) { if (!req.was_trust_region_generated_request && length(req.x - info->best_x) > info->radius*1.001) { //std::cout << "reset radius because of big move, " << length(req.x - info->best_x) << " radius was " << info->radius << std::endl; // reset trust region radius since we made a big move. Doing this will // cause the radius to be reset to the size of the local region. info->radius = 0; } info->best_objective_value = y; info->best_x = req.x; } } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- global_function_search:: global_function_search( const function_spec& function ) : global_function_search(std::vector(1,function)) {} // ---------------------------------------------------------------------------------------- global_function_search:: global_function_search( const std::vector& functions_ ) { DLIB_CASSERT(functions_.size() > 0); m = std::make_shared(); functions.reserve(functions_.size()); for (size_t i = 0; i < functions_.size(); ++i) functions.emplace_back(std::make_shared(functions_[i],i,m)); } // ---------------------------------------------------------------------------------------- global_function_search:: global_function_search( const std::vector& functions_, const std::vector>& initial_function_evals, const double relative_noise_magnitude_ ) : global_function_search(functions_) { DLIB_CASSERT(functions_.size() > 0); DLIB_CASSERT(functions_.size() == initial_function_evals.size()); DLIB_CASSERT(relative_noise_magnitude_ >= 0); relative_noise_magnitude = relative_noise_magnitude_; for (size_t i = 0; i < initial_function_evals.size(); ++i) { functions[i]->ub = upper_bound_function(initial_function_evals[i], relative_noise_magnitude); if (initial_function_evals[i].size() != 0) { auto best = max_scoring_element(initial_function_evals[i], [](const function_evaluation& e) { return e.y; }).first; functions[i]->best_objective_value = best.y; functions[i]->best_x = best.x; } } } // ---------------------------------------------------------------------------------------- size_t global_function_search:: num_functions( ) const { return functions.size(); } // ---------------------------------------------------------------------------------------- void global_function_search:: set_seed ( time_t seed ) { rnd = dlib::rand(seed); } // ---------------------------------------------------------------------------------------- void global_function_search:: get_function_evaluations ( std::vector& specs, std::vector>& function_evals ) const { std::lock_guard lock(*m); specs.clear(); function_evals.clear(); for (size_t i = 0; i < functions.size(); ++i) { specs.emplace_back(functions[i]->spec); function_evals.emplace_back(functions[i]->ub.get_points()); } } // ---------------------------------------------------------------------------------------- void global_function_search:: get_best_function_eval ( matrix& x, double& y, size_t& function_idx ) const { DLIB_CASSERT(num_functions() != 0); std::lock_guard lock(*m); // find the largest value auto& info = *best_function(function_idx); y = info.best_objective_value; x = info.best_x; } // ---------------------------------------------------------------------------------------- function_evaluation_request global_function_search:: get_next_x ( ) { DLIB_CASSERT(num_functions() != 0); using namespace gopt_impl; std::lock_guard lock(*m); // the first thing we do is make sure each function has at least max(3,dimensionality of function) evaluations for (auto& info : functions) { const long dims = info->spec.lower.size(); // If this is the very beginning of the optimization process if (info->ub.num_points()+info->outstanding_evals.size() < 1) { outstanding_function_eval_request new_req; new_req.request_id = next_request_id++; // Pick the point right in the center of the bounds to evaluate first since // people will commonly center the bound on a location they think is good. // So might as well try there first. new_req.x = (info->spec.lower + info->spec.upper)/2.0; for (long i = 0; i < new_req.x.size(); ++i) { if (info->spec.is_integer_variable[i]) new_req.x(i) = std::round(new_req.x(i)); } info->outstanding_evals.emplace_back(new_req); return function_evaluation_request(new_req,info); } else if (info->ub.num_points() < std::max(3,dims)) { outstanding_function_eval_request new_req; new_req.request_id = next_request_id++; new_req.x = make_random_vector(rnd, info->spec.lower, info->spec.upper, info->spec.is_integer_variable); info->outstanding_evals.emplace_back(new_req); return function_evaluation_request(new_req,info); } } if (do_trust_region_step && !has_outstanding_trust_region_request()) { // find the currently best performing function, we will do a trust region // step on it. auto info = best_function(); const long dims = info->spec.lower.size(); // if we have enough points to do a trust region step if (info->ub.num_points() > dims+1) { auto tmp = pick_next_sample_using_trust_region(info->ub.get_points(), info->radius, info->spec.lower, info->spec.upper, info->spec.is_integer_variable); //std::cout << "QP predicted improvement: "<< tmp.predicted_improvement << std::endl; if (tmp.predicted_improvement > min_trust_region_epsilon) { do_trust_region_step = false; outstanding_function_eval_request new_req; new_req.request_id = next_request_id++; new_req.x = tmp.best_x; new_req.was_trust_region_generated_request = true; new_req.anchor_objective_value = info->best_objective_value; new_req.predicted_improvement = tmp.predicted_improvement; info->outstanding_evals.emplace_back(new_req); return function_evaluation_request(new_req, info); } } } // make it so we alternate between upper bounded and trust region steps. do_trust_region_step = true; if (rnd.get_random_double() >= pure_random_search_probability) { // pick a point at random to sample according to the upper bound double best_upper_bound = -std::numeric_limits::infinity(); std::shared_ptr best_funct; matrix next_sample; // so figure out if any function has a good upper bound and if so pick the // function with the largest upper bound for evaluation. for (auto& info : functions) { auto tmp = pick_next_sample_as_max_upper_bound(rnd, info->build_upper_bound_with_all_function_evals(), info->spec.lower, info->spec.upper, info->spec.is_integer_variable, num_random_samples); if (tmp.predicted_improvement > 0 && tmp.upper_bound > best_upper_bound) { best_upper_bound = tmp.upper_bound; next_sample = std::move(tmp.x); best_funct = info; } } // if we found a good function to evaluate then return that. if (best_funct) { outstanding_function_eval_request new_req; new_req.request_id = next_request_id++; new_req.x = std::move(next_sample); best_funct->outstanding_evals.emplace_back(new_req); return function_evaluation_request(new_req, best_funct); } } // pick entirely at random size_t function_idx = rnd.get_integer(functions.size()); auto info = functions[function_idx]; outstanding_function_eval_request new_req; new_req.request_id = next_request_id++; new_req.x = make_random_vector(rnd, info->spec.lower, info->spec.upper, info->spec.is_integer_variable); info->outstanding_evals.emplace_back(new_req); return function_evaluation_request(new_req, info); } // ---------------------------------------------------------------------------------------- double global_function_search:: get_pure_random_search_probability ( ) const { return pure_random_search_probability; } // ---------------------------------------------------------------------------------------- void global_function_search:: set_pure_random_search_probability ( double prob ) { DLIB_CASSERT(0 <= prob && prob <= 1); pure_random_search_probability = prob; } // ---------------------------------------------------------------------------------------- double global_function_search:: get_solver_epsilon ( ) const { return min_trust_region_epsilon; } // ---------------------------------------------------------------------------------------- void global_function_search:: set_solver_epsilon ( double eps ) { DLIB_CASSERT(0 <= eps); min_trust_region_epsilon = eps; } // ---------------------------------------------------------------------------------------- double global_function_search:: get_relative_noise_magnitude ( ) const { return relative_noise_magnitude; } // ---------------------------------------------------------------------------------------- void global_function_search:: set_relative_noise_magnitude ( double value ) { DLIB_CASSERT(0 <= value); relative_noise_magnitude = value; if (m) { std::lock_guard lock(*m); // recreate all the upper bound functions with the new relative noise magnitude for (auto& f : functions) f->ub = upper_bound_function(f->ub.get_points(), relative_noise_magnitude); } } // ---------------------------------------------------------------------------------------- size_t global_function_search:: get_monte_carlo_upper_bound_sample_num ( ) const { return num_random_samples; } // ---------------------------------------------------------------------------------------- void global_function_search:: set_monte_carlo_upper_bound_sample_num ( size_t num ) { num_random_samples = num; } // ---------------------------------------------------------------------------------------- std::shared_ptr global_function_search:: best_function( ) const { size_t idx = 0; return best_function(idx); } // ---------------------------------------------------------------------------------------- std::shared_ptr global_function_search:: best_function( size_t& idx ) const { auto compare = [](const std::shared_ptr& a, const std::shared_ptr& b) { return a->best_objective_value < b->best_objective_value; }; auto i = std::max_element(functions.begin(), functions.end(), compare); idx = std::distance(functions.begin(),i); return *i; } // ---------------------------------------------------------------------------------------- bool global_function_search:: has_outstanding_trust_region_request ( ) const { for (auto& f : functions) { for (auto& i : f->outstanding_evals) { if (i.was_trust_region_generated_request) return true; } } return false; } // ---------------------------------------------------------------------------------------- }