// Copyright (C) 2017 Davis E. King ([email protected]) | |
// License: Boost Software License See LICENSE.txt for the full license. | |
namespace dlib | |
{ | |
// ---------------------------------------------------------------------------------------- | |
struct function_spec | |
{ | |
/*! | |
WHAT THIS OBJECT REPRESENTS | |
This object is a simple struct that lets you define the valid inputs to a | |
multivariate function. It lets you define bound constraints for each | |
variable as well as say if a variable is integer valued or not. Therefore, | |
an instance of this struct says that a function takes upper.size() input | |
variables, where the ith variable must be in the range [lower(i) upper(i)] | |
and be an integer if is_integer_variable[i]==true. | |
!*/ | |
function_spec( | |
matrix<double,0,1> bound1, | |
matrix<double,0,1> bound2 | |
); | |
/*! | |
requires | |
- bound1.size() == bound2.size() | |
- for all valid i: bound1(i) != bound2(i) | |
ensures | |
- #is_integer_variable.size() == bound1.size() | |
- #lower.size() == bound1.size() | |
- #upper.size() == bound1.size() | |
- for all valid i: | |
- #is_integer_variable[i] == false | |
- #lower(i) == min(bound1(i), bound2(i)) | |
- #upper(i) == max(bound1(i), bound2(i)) | |
!*/ | |
function_spec( | |
matrix<double,0,1> lower, | |
matrix<double,0,1> upper, | |
std::vector<bool> is_integer | |
); | |
/*! | |
requires | |
- bound1.size() == bound2.size() == is_integer.size() | |
- for all valid i: bound1(i) != bound2(i) | |
ensures | |
- #is_integer_variable.size() == bound1.size() | |
- #lower.size() == bound1.size() | |
- #upper.size() == bound1.size() | |
- for all valid i: | |
- #is_integer_variable[i] == is_integer[i] | |
- #lower(i) == min(bound1(i), bound2(i)) | |
- #upper(i) == max(bound1(i), bound2(i)) | |
!*/ | |
matrix<double,0,1> lower; | |
matrix<double,0,1> upper; | |
std::vector<bool> is_integer_variable; | |
}; | |
// ---------------------------------------------------------------------------------------- | |
class function_evaluation_request | |
{ | |
/*! | |
WHAT THIS OBJECT REPRESENTS | |
This object represents a request, by the global_function_search object, to | |
evaluate a real-valued function and report back the results. | |
THREAD SAFETY | |
You shouldn't let more than one thread touch a function_evaluation_request | |
at the same time. However, it is safe to send instances of this class to | |
other threads for processing. This lets you evaluate multiple | |
function_evaluation_requests in parallel. Any appropriate synchronization | |
with regard to the originating global_function_search instance is handled | |
automatically. | |
!*/ | |
public: | |
// You can't make or copy this object, the only way to get one is from the | |
// global_function_search class via get_next_x(). | |
function_evaluation_request() = delete; | |
function_evaluation_request(const function_evaluation_request&) = delete; | |
function_evaluation_request& operator=(const function_evaluation_request&) = delete; | |
// You can however move and swap this object. | |
function_evaluation_request(function_evaluation_request&& item); | |
function_evaluation_request& operator=(function_evaluation_request&& item); | |
/*! | |
ensures | |
- *this takes the state of item. | |
- #item.has_been_evaluated() == true | |
!*/ | |
~function_evaluation_request( | |
); | |
/*! | |
ensures | |
- frees all resources associated with this object. | |
- It's fine to destruct function_evaluation_requests even if they haven't | |
been evaluated yet. If this happens it will simply be as if the request | |
was never issued. | |
!*/ | |
size_t function_idx ( | |
) const; | |
/*! | |
ensures | |
- Returns the function index that identifies which function is to be | |
evaluated. | |
!*/ | |
const matrix<double,0,1>& x ( | |
) const; | |
/*! | |
ensures | |
- returns the input parameters to the function to be evaluated. | |
!*/ | |
bool has_been_evaluated ( | |
) const; | |
/*! | |
ensures | |
- If this evaluation request is still outstanding then returns false, | |
otherwise returns true. That is, if the global_function_search is still | |
waiting for you report back by calling set() then | |
has_been_evaluated()==false. | |
!*/ | |
void set ( | |
double y | |
); | |
/*! | |
requires | |
- has_been_evaluated() == false | |
ensures | |
- #has_been_evaluated() == true | |
- Notifies the global_function_search instance that created this object | |
that when the function_idx()th function is evaluated with x() as input | |
then the output is y. | |
!*/ | |
void swap( | |
function_evaluation_request& item | |
); | |
/*! | |
ensures | |
- swaps the state of *this and item | |
!*/ | |
}; | |
// ---------------------------------------------------------------------------------------- | |
class global_function_search | |
{ | |
/*! | |
WHAT THIS OBJECT REPRESENTS | |
This object performs global optimization of a set of user supplied | |
functions. The goal is to maximize the following objective function: | |
max_{function_i,x_i}: function_i(x_i) | |
subject to bound constraints on each element of x_i. Moreover, each | |
element of x_i can be either real valued or integer valued. Each of the | |
functions can also take a different number of variables. Therefore, the | |
final result of the optimization tells you which function produced the | |
largest output and what input (i.e. the x value) to that function is | |
necessary to obtain that maximal value. | |
Importantly, the global_function_search object does not require the user to | |
supply derivatives. Moreover, the functions may contain discontinuities, | |
behave stochastically, and have many local maxima. The global_function_search | |
object will attempt to find the global optima in the face of these challenges. | |
It is also designed to use as few function evaluations as possible, making | |
it suitable for optimizing functions that are very expensive to evaluate. | |
It does this by alternating between two modes. A global exploration mode | |
and a local optima refinement mode. This is accomplished by building and | |
maintaining two models of the objective function: | |
1. A global model that upper bounds our objective function. This is a | |
non-parametric piecewise linear model based on all function | |
evaluations ever seen by the global_function_search object. | |
2. A local quadratic model fit around the best point seen so far. | |
The optimization procedure therefore looks like this: | |
while(not done) | |
{ | |
DO GLOBAL EXPLORE STEP: | |
Find the point that maximizes the upper bounding model since | |
that is the point with the largest possible improvement in the | |
objective function. | |
Evaluate the new point and incorporate it into our models. | |
DO LOCAL REFINEMENT STEP: | |
Find the optimal solution to the local quadratic model. | |
If this point looks like it will improve on the "best point seen | |
so far" by at least get_solver_epsilon() then we evaluate that | |
point and incorporate it into our models, otherwise we ignore | |
it. | |
} | |
You can see that we alternate between global search and local refinement, | |
except in the case where the local model seems to have converged to within | |
get_solver_epsilon() accuracy. In that case only global search steps are | |
used. We do this in the hope that the global search will find a new and | |
better local optima to explore, which would then reactivate local | |
refinement when it has something productive to do. | |
Now let's turn our attention to the specific API defined by the | |
global_function_search object. We will begin by showing a short example of | |
its use: | |
// Suppose we want to find which of these functions, F() and G(), have | |
// the largest output and what input is necessary to produce the | |
// maximal output. | |
auto F = [](double a, double b) { return -std::pow(a-2,2.0) - std::pow(b-4,2.0); }; | |
auto G = [](double x) { return 2-std::pow(x-5,2.0); }; | |
// We first define function_spec objects that specify bounds on the | |
// inputs to each function. The search process will only search within | |
// these bounds. | |
function_spec spec_F({-10,-10}, {10,10}); | |
function_spec spec_G({-2}, {6}); | |
// Then we create a global_function_search object with those function specifications. | |
global_function_search opt({spec_F, spec_G}); | |
// Here we run 15 iterations of the search process. Note that the user | |
// of global_function_search writes the main solver loop, which is | |
// somewhat unusual. We will discuss why that is in a moment, but for | |
// now let's look at this example. | |
for (int i = 0; i < 15; ++i) | |
{ | |
// All we do here is ask the global_function_search object what to | |
// evaluate next, then do what it asked, and then report the | |
// results back by calling function_evaluation_request's set() | |
// method. | |
function_evaluation_request next = opt.get_next_x(); | |
// next.function_idx() tells you which of the functions you should | |
// evaluate. We have 2 functions here (F and G) so function_idx() | |
// can take only the values 0 and 1. If, for example, we had 10 | |
// functions it would take the values 0 through 9. | |
if (next.function_idx() == 0) | |
{ | |
// Call F with the inputs requested by the | |
// global_function_search and report them back. | |
double a = next.x()(0); | |
double b = next.x()(1); | |
next.set(F(a,b)); // Tell the solver what happened. | |
} | |
else | |
{ | |
double x = next.x()(0); | |
next.set(G(x)); | |
} | |
} | |
// Find out what point gave the largest outputs: | |
matrix<double,0,1> x; | |
double y; | |
size_t function_idx; | |
opt.get_best_function_eval(x,y,function_idx); | |
cout << "function_idx: "<< function_idx << endl; | |
cout << "y: " << y << endl; | |
cout << "x: " << x << endl; | |
The above cout statements will print this: | |
function_idx: 1 | |
y: 2 | |
x: 5 | |
Which is the correct result since G(5) gives the largest possible output in | |
our example. | |
So why does the user write the main loop? Why isn't it embedded inside | |
dlib? Well, there are two answers to this. The first is that it is. Most | |
users should just call dlib::find_max_global() which does exactly that, it | |
runs the loop for you. However, the API shown above gives you the | |
opportunity to run multiple function evaluations in parallel. For | |
instance, it is perfectly valid to call get_next_x() multiple times and | |
send the resulting function_evaluation_request objects to separate threads | |
for processing. Those separate threads can run the functions being | |
optimized (e.g. F and G or whatever) and report back by calling | |
function_evaluation_request::set(). You could even spread the work across | |
a compute cluster if you have one. Note that find_max_global() optionally | |
supports this type of parallel execution, however you get more flexibility | |
with the global_function_search's API. As another example, it's possible | |
to save the state of the solver to disk so you can restart the optimization | |
from that point at a later date when using global_function_search, but not | |
with find_max_global(). | |
So what happens if you have N outstanding function evaluation requests? | |
Or in other words, what happens if you called get_next_x() N times and | |
haven't yet called their set() methods? Well, 1 of the N requests will be | |
a local refinement step while the N-1 other requests will be global | |
exploration steps generated from the current upper bounding model. This | |
should give you an idea of the usefulness of this kind of parallelism. If | |
for example, your functions being optimized were simple convex functions | |
this kind of parallelism wouldn't help since essentially all the | |
interesting work in the solver is going to be done by the local optimizer. | |
However, if your function has a lot of local optima, running many global | |
exploration steps in parallel might significantly reduce the time it takes | |
to find a good solution. | |
It should also be noted that our upper bounding model is implemented by the | |
dlib::upper_bound_function object, which is a tool that allows us to create | |
a tight upper bound on our objective function. This upper bound is | |
non-parametric and gets progressively more accurate as the optimization | |
progresses, but also more and more expensive to maintain. It causes the | |
runtime of the entire optimization procedure to be O(N^2) where N is the | |
number of objective function evaluations. So problems that require millions | |
of function evaluations to find a good solution are not appropriate for the | |
global_function_search tool. However, if your objective function is very | |
expensive to evaluate then this relatively expensive upper bounding model | |
is well worth its computational cost. | |
Finally, let's introduce some background literature on this algorithm. The | |
two most relevant papers in the optimization literature are: | |
Global optimization of Lipschitz functions Malherbe, Cédric and Vayatis, | |
Nicolas International Conference on Machine Learning - 2017 | |
and | |
The NEWUOA software for unconstrained optimization without derivatives By | |
M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice, | |
Italy, 2004) | |
Our upper bounding model is an extension of the AdaLIPO method in the | |
Malherbe. See the documentation of dlib::upper_bound_function for more | |
details on that, as we make a number of important extensions. The other | |
part of our method, our local refinement model, is essentially the same | |
type of trust region model proposed by Powell in the above paper. That is, | |
each time we do a local refinement step we identify the best point seen so | |
far, fit a quadratic function around it using the function evaluations we | |
have collected so far, and then use a simple trust region procedure to | |
decide the next best point to evaluate based on our quadratic model. | |
The method proposed by Malherbe gives excellent global search performance | |
but has terrible convergence properties in the area around a maxima. | |
Powell's method on the other hand has excellent convergence in the area | |
around a local maxima, as expected by a quadratic trust region method, but | |
is aggressively local maxima seeking. It will simply get stuck in the | |
nearest local optima. Combining the two together as we do here gives us | |
excellent performance in both global search and final convergence speed | |
near a local optima. Causing the global_function_search to perform well | |
for functions with many local optima while still giving high precision | |
solutions. For instance, on typical tests problems, like the Holder table | |
function, the global_function_search object can reliably find the globally | |
optimal solution to full floating point precision in under a few hundred | |
steps. | |
THREAD SAFETY | |
You shouldn't let more than one thread touch a global_function_search | |
instance at the same time. | |
!*/ | |
public: | |
global_function_search( | |
); | |
/*! | |
ensures | |
- #num_functions() == 0 | |
- #get_relative_noise_magnitude() == 0.001 | |
- #get_solver_epsilon() == 0 | |
- #get_monte_carlo_upper_bound_sample_num() == 5000 | |
- #get_pure_random_search_probability() == 0.02 | |
!*/ | |
explicit global_function_search( | |
const function_spec& function | |
); | |
/*! | |
ensures | |
- #num_functions() == 1 | |
- #get_function_evaluations() will indicate that there are no function evaluations yet. | |
- #get_relative_noise_magnitude() == 0.001 | |
- #get_solver_epsilon() == 0 | |
- #get_monte_carlo_upper_bound_sample_num() == 5000 | |
- #get_pure_random_search_probability() == 0.02 | |
!*/ | |
explicit global_function_search( | |
const std::vector<function_spec>& functions | |
); | |
/*! | |
ensures | |
- #num_functions() == functions.size() | |
- #get_function_evaluations() will indicate that there are no function evaluations yet. | |
- #get_relative_noise_magnitude() == 0.001 | |
- #get_solver_epsilon() == 0 | |
- #get_monte_carlo_upper_bound_sample_num() == 5000 | |
- #get_pure_random_search_probability() == 0.02 | |
!*/ | |
global_function_search( | |
const std::vector<function_spec>& functions, | |
const std::vector<std::vector<function_evaluation>>& initial_function_evals, | |
const double relative_noise_magnitude = 0.001 | |
); | |
/*! | |
requires | |
- functions.size() == initial_function_evals.size() | |
- relative_noise_magnitude >= 0 | |
ensures | |
- #num_functions() == functions.size() | |
- #get_function_evaluations() will return the provided initial_function_evals. | |
- #get_relative_noise_magnitude() == relative_noise_magnitude | |
- #get_solver_epsilon() == 0 | |
- #get_monte_carlo_upper_bound_sample_num() == 5000 | |
- #get_pure_random_search_probability() == 0.02 | |
!*/ | |
// This object can't be copied. | |
global_function_search(const global_function_search&) = delete; | |
global_function_search& operator=(const global_function_search& item) = delete; | |
// But it can be moved | |
global_function_search(global_function_search&& item) = default; | |
global_function_search& operator=(global_function_search&& item) = default; | |
/*! | |
ensures | |
- moves the state of item into *this | |
- #item.num_functions() == 0 | |
!*/ | |
void set_seed ( | |
time_t seed | |
); | |
/*! | |
ensures | |
- Part of this object's algorithm uses random sampling to decide what | |
points to evaluate next. Calling set_seed() lets you set the seed used | |
by the random number generator. Note that if you don't call set_seed() | |
you will always get the same deterministic behavior. | |
!*/ | |
size_t num_functions( | |
) const; | |
/*! | |
ensures | |
- returns the number of functions being optimized. | |
!*/ | |
void get_function_evaluations ( | |
std::vector<function_spec>& specs, | |
std::vector<std::vector<function_evaluation>>& function_evals | |
) const; | |
/*! | |
ensures | |
- #specs.size() == num_functions() | |
- #function_evals.size() == num_functions() | |
- This function allows you to query the state of the solver. In | |
particular, you can find the function_specs for each function being | |
optimized and their recorded evaluations. | |
- for all valid i: | |
- function_evals[i] == all the function evaluations that have been | |
recorded for the ith function (i.e. the function with the | |
function_spec #specs[i]). That is, this is the record of all the x | |
and y values reported back by function_evaluation_request::set() | |
calls. | |
!*/ | |
void get_best_function_eval ( | |
matrix<double,0,1>& x, | |
double& y, | |
size_t& function_idx | |
) const; | |
/*! | |
requires | |
- num_functions() != 0 | |
ensures | |
- if (no function evaluations have been recorded yet) then | |
- The outputs of this function are in a valid but undefined state. | |
- else | |
- This function tells you which function has produced the largest | |
output seen so far. It also tells you the inputs to that function | |
that leads to those outputs (x) as well as the output value itself (y). | |
- 0 <= #function_idx < num_functions() | |
- #function_idx == the index of the function that produced the largest output seen so far. | |
- #x == the input parameters to the function that produced the largest outputs seen so far. | |
- #y == the largest output seen so far. | |
!*/ | |
function_evaluation_request get_next_x ( | |
); | |
/*! | |
requires | |
- num_functions() != 0 | |
ensures | |
- Generates and returns a function evaluation request. See the discussion | |
in the WHAT THIS OBJECT REPRESENTS section above for details. | |
!*/ | |
double get_pure_random_search_probability ( | |
) const; | |
/*! | |
ensures | |
- When we decide to do a global explore step we will, with probability | |
get_pure_random_search_probability(), sample a point completely at random | |
rather than using the upper bounding model. Therefore, if you set this | |
probability to 0 then we will depend entirely on the upper bounding | |
model. Alternatively, if you set get_pure_random_search_probability() to | |
1 then we won't use the upper bounding model at all and instead use pure | |
random search to do global exploration. Pure random search is much | |
faster than using the upper bounding model, so if you know that your | |
objective function is especially simple it can be faster to use pure | |
random search. However, if you really know your function that well you | |
should probably use a gradient based optimizer :) | |
!*/ | |
void set_pure_random_search_probability ( | |
double prob | |
); | |
/*! | |
requires | |
- prob >= 0 | |
ensures | |
- #get_pure_random_search_probability() == prob | |
!*/ | |
double get_solver_epsilon ( | |
) const; | |
/*! | |
ensures | |
- As discussed in the WHAT THIS OBJECT REPRESENTS section, we only do a | |
local refinement step if we haven't already found the peak of the current | |
local optima. get_solver_epsilon() sets the tolerance for deciding if | |
the local search method has found the local optima. Therefore, when the | |
local trust region model runs we check if its predicted improvement in | |
the objective function is greater than get_solver_epsilon(). If it isn't | |
then we assume it has converged and we should focus entirely on global | |
search. | |
This means that, for instance, setting get_solver_epsilon() to 0 | |
essentially instructs the solver to find each local optima to full | |
floating point precision and only then to focus on pure global search. | |
!*/ | |
void set_solver_epsilon ( | |
double eps | |
); | |
/*! | |
requires | |
- eps >= 0 | |
ensures | |
- #get_solver_epsilon() == eps | |
!*/ | |
double get_relative_noise_magnitude ( | |
) const; | |
/*! | |
ensures | |
- Returns the value of the relative noise magnitude parameter to the | |
dlib::upper_bound_function's used by this object. See the | |
upper_bound_function's documentation for a detailed discussion of this | |
parameter's meaning. Most users should leave this value as its default | |
setting. | |
!*/ | |
void set_relative_noise_magnitude ( | |
double value | |
); | |
/*! | |
requires | |
- value >= 0 | |
ensures | |
- #get_relative_noise_magnitude() == value | |
!*/ | |
size_t get_monte_carlo_upper_bound_sample_num ( | |
) const; | |
/*! | |
ensures | |
- To find the point that maximizes the upper bounding model we use | |
get_monte_carlo_upper_bound_sample_num() random evaluations and select | |
the largest upper bound from that set. So this parameter influences how | |
well we estimate the maximum point on the upper bounding model. | |
!*/ | |
void set_monte_carlo_upper_bound_sample_num ( | |
size_t num | |
); | |
/*! | |
requires | |
- num > 0 | |
ensures | |
- #get_monte_carlo_upper_bound_sample_num() == num | |
!*/ | |
}; | |
// ---------------------------------------------------------------------------------------- | |
} | |