|
|
|
|
|
#ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_ |
|
#define DLIB_UPPER_bOUND_FUNCTION_Hh_ |
|
|
|
#include "upper_bound_function_abstract.h" |
|
#include "../svm/svm_c_linear_dcd_trainer.h" |
|
#include "../statistics.h" |
|
#include <limits> |
|
#include <utility> |
|
|
|
namespace dlib |
|
{ |
|
|
|
|
|
|
|
struct function_evaluation |
|
{ |
|
function_evaluation() = default; |
|
function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {} |
|
|
|
matrix<double,0,1> x; |
|
double y = std::numeric_limits<double>::quiet_NaN(); |
|
}; |
|
|
|
|
|
|
|
class upper_bound_function |
|
{ |
|
|
|
public: |
|
|
|
upper_bound_function( |
|
) = default; |
|
|
|
upper_bound_function( |
|
const double relative_noise_magnitude, |
|
const double solver_eps |
|
) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps) |
|
{ |
|
DLIB_CASSERT(relative_noise_magnitude >= 0); |
|
DLIB_CASSERT(solver_eps > 0); |
|
} |
|
|
|
explicit upper_bound_function( |
|
const std::vector<function_evaluation>& _points, |
|
const double relative_noise_magnitude = 0.001, |
|
const double solver_eps = 0.0001 |
|
) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points) |
|
{ |
|
DLIB_CASSERT(relative_noise_magnitude >= 0); |
|
DLIB_CASSERT(solver_eps > 0); |
|
|
|
if (points.size() > 1) |
|
{ |
|
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty."); |
|
|
|
const long dims = points[0].x.size(); |
|
for (auto& p : points) |
|
DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality."); |
|
|
|
learn_params(); |
|
} |
|
|
|
} |
|
|
|
void add ( |
|
const function_evaluation& point |
|
) |
|
{ |
|
DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty."); |
|
if (points.size() == 0) |
|
{ |
|
points.push_back(point); |
|
return; |
|
} |
|
|
|
DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality."); |
|
|
|
if (points.size() < 4) |
|
{ |
|
points.push_back(point); |
|
*this = upper_bound_function(points, relative_noise_magnitude, solver_eps); |
|
return; |
|
} |
|
|
|
points.push_back(point); |
|
|
|
for (size_t i = 0; i < points.size()-1; ++i) |
|
active_constraints.push_back(std::make_pair(i,points.size()-1)); |
|
|
|
learn_params(); |
|
} |
|
|
|
long num_points( |
|
) const |
|
{ |
|
return points.size(); |
|
} |
|
|
|
long dimensionality( |
|
) const |
|
{ |
|
if (points.size() == 0) |
|
return 0; |
|
else |
|
return points[0].x.size(); |
|
} |
|
|
|
const std::vector<function_evaluation>& get_points( |
|
) const |
|
{ |
|
return points; |
|
} |
|
|
|
double operator() ( |
|
const matrix<double,0,1>& x |
|
) const |
|
{ |
|
DLIB_CASSERT(num_points() > 0); |
|
DLIB_CASSERT(x.size() == dimensionality()); |
|
|
|
|
|
|
|
double upper_bound = std::numeric_limits<double>::infinity(); |
|
|
|
for (size_t i = 0; i < points.size(); ++i) |
|
{ |
|
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x))); |
|
upper_bound = std::min(upper_bound, local_bound); |
|
} |
|
|
|
return upper_bound; |
|
} |
|
|
|
private: |
|
|
|
void learn_params ( |
|
) |
|
{ |
|
const long dims = points[0].x.size(); |
|
|
|
using sample_type = std::vector<std::pair<size_t,double>>; |
|
using kernel_type = sparse_linear_kernel<sample_type>; |
|
std::vector<sample_type> x; |
|
std::vector<double> y; |
|
|
|
|
|
|
|
std::vector<running_stats<double>> x_rs(dims); |
|
running_stats<double> y_rs; |
|
for (auto& v : points) |
|
{ |
|
for (long i = 0; i < v.x.size(); ++i) |
|
x_rs[i].add(v.x(i)); |
|
y_rs.add(v.y); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const double yscale = 1.0/y_rs.stddev(); |
|
std::vector<double> xscale(dims); |
|
for (size_t i = 0; i < xscale.size(); ++i) |
|
xscale[i] = 1.0/(x_rs[i].stddev()*yscale); |
|
|
|
sample_type samp; |
|
auto add_constraint = [&](long i, long j) { |
|
samp.clear(); |
|
for (long k = 0; k < dims; ++k) |
|
{ |
|
double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale; |
|
samp.push_back(std::make_pair(k, temp*temp)); |
|
} |
|
|
|
if (points[i].y > points[j].y) |
|
samp.push_back(std::make_pair(dims + j, relative_noise_magnitude)); |
|
else |
|
samp.push_back(std::make_pair(dims + i, relative_noise_magnitude)); |
|
|
|
const double diff = (points[i].y - points[j].y)*yscale; |
|
samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff)); |
|
|
|
x.push_back(samp); |
|
y.push_back(1); |
|
}; |
|
|
|
if (active_constraints.size() == 0) |
|
{ |
|
x.reserve(points.size()*(points.size()-1)/2); |
|
y.reserve(points.size()*(points.size()-1)/2); |
|
for (size_t i = 0; i < points.size(); ++i) |
|
{ |
|
for (size_t j = i+1; j < points.size(); ++j) |
|
{ |
|
add_constraint(i,j); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
for (auto& p : active_constraints) |
|
add_constraint(p.first, p.second); |
|
} |
|
|
|
|
|
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type> trainer; |
|
trainer.set_c(std::numeric_limits<double>::infinity()); |
|
|
|
trainer.force_last_weight_to_1(true); |
|
trainer.set_epsilon(solver_eps); |
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state; |
|
auto df = trainer.train(x,y, state); |
|
|
|
|
|
|
|
if (active_constraints.size() == 0) |
|
{ |
|
long k = 0; |
|
for (size_t i = 0; i < points.size(); ++i) |
|
{ |
|
for (size_t j = i+1; j < points.size(); ++j) |
|
{ |
|
if (state.get_alpha()[k++] != 0) |
|
active_constraints.push_back(std::make_pair(i,j)); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
DLIB_CASSERT(state.get_alpha().size() == active_constraints.size()); |
|
new_active_constraints.clear(); |
|
for (size_t i = 0; i < state.get_alpha().size(); ++i) |
|
{ |
|
if (state.get_alpha()[i] != 0) |
|
new_active_constraints.push_back(active_constraints[i]); |
|
} |
|
active_constraints.swap(new_active_constraints); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
const auto& bv = df.basis_vectors(0); |
|
slopes.set_size(dims); |
|
for (long i = 0; i < dims; ++i) |
|
slopes(i) = bv[i].second*xscale[i]*xscale[i]; |
|
|
|
|
|
|
|
offsets.assign(points.size(),0); |
|
|
|
|
|
for (size_t i = 0; i < points.size(); ++i) |
|
{ |
|
offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude; |
|
} |
|
} |
|
|
|
|
|
|
|
double relative_noise_magnitude = 0.001; |
|
double solver_eps = 0.0001; |
|
std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints; |
|
|
|
std::vector<function_evaluation> points; |
|
std::vector<double> offsets; |
|
matrix<double,0,1> slopes; |
|
}; |
|
|
|
|
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|