AshanGimhana's picture
Upload folder using huggingface_hub
9375c9a verified
raw
history blame
18.9 kB
// Copyright (C) 2009 Davis E. King ([email protected])
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_HESSIAN_PYRAMId_Hh_
#define DLIB_HESSIAN_PYRAMId_Hh_
#include "hessian_pyramid_abstract.h"
#include "../algs.h"
#include "../image_transforms/integral_image.h"
#include "../array.h"
#include "../array2d.h"
#include "../noncopyable.h"
#include "../matrix.h"
#include "../stl_checked.h"
#include <algorithm>
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct interest_point
{
interest_point() : scale(0), score(0), laplacian(0) {}
dlib::vector<double,2> center;
double scale;
double score;
double laplacian;
bool operator < (const interest_point& p) const { return score < p.score; }
};
// ----------------------------------------------------------------------------------------
inline void serialize(
const interest_point& item,
std::ostream& out
)
{
try
{
serialize(item.center,out);
serialize(item.scale,out);
serialize(item.score,out);
serialize(item.laplacian,out);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while serializing object of type interest_point");
}
}
// ----------------------------------------------------------------------------------------
inline void deserialize(
interest_point& item,
std::istream& in
)
{
try
{
deserialize(item.center,in);
deserialize(item.scale,in);
deserialize(item.score,in);
deserialize(item.laplacian,in);
}
catch (serialization_error& e)
{
throw serialization_error(e.info + "\n while deserializing object of type interest_point");
}
}
// ----------------------------------------------------------------------------------------
class hessian_pyramid : noncopyable
{
public:
hessian_pyramid()
{
num_octaves = 0;
num_intervals = 0;
initial_step_size = 0;
}
template <typename integral_image_type>
void build_pyramid (
const integral_image_type& img,
long num_octaves,
long num_intervals,
long initial_step_size
)
{
DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0,
"\tvoid build_pyramid()"
<< "\n\tAll arguments to this function must be > 0"
<< "\n\t this: " << this
<< "\n\t num_octaves: " << num_octaves
<< "\n\t num_intervals: " << num_intervals
<< "\n\t initial_step_size: " << initial_step_size
);
this->num_octaves = num_octaves;
this->num_intervals = num_intervals;
this->initial_step_size = initial_step_size;
// allocate space for the pyramid
pyramid.resize(num_octaves*num_intervals);
for (long o = 0; o < num_octaves; ++o)
{
const long step_size = get_step_size(o);
for (long i = 0; i < num_intervals; ++i)
{
pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size);
}
}
// now fill out the pyramid with data
for (long o = 0; o < num_octaves; ++o)
{
const long step_size = get_step_size(o);
for (long i = 0; i < num_intervals; ++i)
{
const long border_size = get_border_size(i)*step_size;
const long lobe_size = static_cast<long>(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1;
const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0);
const long lobe_offset = lobe_size/2+1;
const point tl(-lobe_offset,-lobe_offset);
const point tr(lobe_offset,-lobe_offset);
const point bl(-lobe_offset,lobe_offset);
const point br(lobe_offset,lobe_offset);
for (long r = border_size; r < img.nr() - border_size; r += step_size)
{
for (long c = border_size; c < img.nc() - border_size; c += step_size)
{
const point p(c,r);
double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) -
img.get_sum_of_area(centered_rect(p, lobe_size, 2*lobe_size-1))*3.0;
double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) -
img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0;
double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) +
img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) -
img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) -
img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size));
// now we normalize the filter responses
Dxx *= area_inv;
Dyy *= area_inv;
Dxy *= area_inv;
double sign_of_laplacian = +1;
if (Dxx + Dyy < 0)
sign_of_laplacian = -1;
double determinant = Dxx*Dyy - 0.81*Dxy*Dxy;
// If the determinant is negative then just blank it out by setting
// it to zero.
if (determinant < 0)
determinant = 0;
// Save the determinant of the Hessian into our image pyramid. Also
// pack the laplacian sign into the value so we can get it out later.
pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant;
}
}
}
}
}
long get_border_size (
long interval
) const
{
DLIB_ASSERT(0 <= interval && interval < intervals(),
"\tlong get_border_size(interval)"
<< "\n\tInvalid interval value"
<< "\n\t this: " << this
<< "\n\t interval: " << interval
);
const double lobe_size = 2.0*(interval+1) + 1;
const double filter_size = 3*lobe_size;
const long bs = static_cast<long>(std::ceil(filter_size/2.0));
return bs;
}
long get_step_size (
long octave
) const
{
DLIB_ASSERT(0 <= octave && octave < octaves(),
"\tlong get_step_size(octave)"
<< "\n\tInvalid octave value"
<< "\n\t this: " << this
<< "\n\t octave: " << octave
);
return initial_step_size*static_cast<long>(std::pow(2.0, (double)octave)+0.5);
}
long nr (
long octave
) const
{
DLIB_ASSERT(0 <= octave && octave < octaves(),
"\tlong nr(octave)"
<< "\n\tInvalid octave value"
<< "\n\t this: " << this
<< "\n\t octave: " << octave
);
return pyramid[num_intervals*octave].nr();
}
long nc (
long octave
) const
{
DLIB_ASSERT(0 <= octave && octave < octaves(),
"\tlong nc(octave)"
<< "\n\tInvalid octave value"
<< "\n\t this: " << this
<< "\n\t octave: " << octave
);
return pyramid[num_intervals*octave].nc();
}
double get_value (
long octave,
long interval,
long r,
long c
) const
{
DLIB_ASSERT(0 <= octave && octave < octaves() &&
0 <= interval && interval < intervals() &&
get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) &&
get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval),
"\tdouble get_value(octave, interval, r, c)"
<< "\n\tInvalid inputs to this function"
<< "\n\t this: " << this
<< "\n\t octave: " << octave
<< "\n\t interval: " << interval
<< "\n\t octaves: " << octaves()
<< "\n\t intervals: " << intervals()
<< "\n\t r: " << r
<< "\n\t c: " << c
<< "\n\t nr(octave): " << nr(octave)
<< "\n\t nc(octave): " << nc(octave)
<< "\n\t get_border_size(interval): " << get_border_size(interval)
);
return std::abs(pyramid[num_intervals*octave + interval][r][c]);
}
double get_laplacian (
long octave,
long interval,
long r,
long c
) const
{
DLIB_ASSERT(0 <= octave && octave < octaves() &&
0 <= interval && interval < intervals() &&
get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) &&
get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval),
"\tdouble get_laplacian(octave, interval, r, c)"
<< "\n\tInvalid inputs to this function"
<< "\n\t this: " << this
<< "\n\t octave: " << octave
<< "\n\t interval: " << interval
<< "\n\t octaves: " << octaves()
<< "\n\t intervals: " << intervals()
<< "\n\t r: " << r
<< "\n\t c: " << c
<< "\n\t nr(octave): " << nr(octave)
<< "\n\t nc(octave): " << nc(octave)
<< "\n\t get_border_size(interval): " << get_border_size(interval)
);
// return the sign of the laplacian
if (pyramid[num_intervals*octave + interval][r][c] > 0)
return +1;
else
return -1;
}
long octaves (
) const { return num_octaves; }
long intervals (
) const { return num_intervals; }
private:
long num_octaves;
long num_intervals;
long initial_step_size;
typedef array2d<double> image_type;
typedef array<image_type> pyramid_type;
pyramid_type pyramid;
};
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace hessian_pyramid_helpers
{
inline bool is_maximum_in_region(
const hessian_pyramid& pyr,
long o,
long i,
long r,
long c
)
{
// First check if this point is near the edge of the octave
// If it is then we say it isn't a maximum as these points are
// not as reliable.
if (i <= 0 || i+1 >= pyr.intervals())
{
return false;
}
const double val = pyr.get_value(o,i,r,c);
// now check if there are any bigger values around this guy
for (long ii = i-1; ii <= i+1; ++ii)
{
for (long rr = r-1; rr <= r+1; ++rr)
{
for (long cc = c-1; cc <= c+1; ++cc)
{
if (pyr.get_value(o,ii,rr,cc) > val)
return false;
}
}
}
return true;
}
// ------------------------------------------------------------------------------------
inline const matrix<double,3,1> get_hessian_gradient (
const hessian_pyramid& pyr,
long o,
long i,
long r,
long c
)
{
matrix<double,3,1> grad;
grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0;
grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0;
grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0;
return grad;
}
// ------------------------------------------------------------------------------------
inline const matrix<double,3,3> get_hessian_hessian (
const hessian_pyramid& pyr,
long o,
long i,
long r,
long c
)
{
matrix<double,3,3> hess;
const double val = pyr.get_value(o,i,r,c);
double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val;
double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val;
double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val;
double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) -
pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0;
double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) -
pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0;
double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) -
pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0;
hess = Dxx, Dxy, Dxs,
Dxy, Dyy, Dys,
Dxs, Dys, Dss;
return hess;
}
// ------------------------------------------------------------------------------------
inline const interest_point interpolate_point (
const hessian_pyramid& pyr,
long o,
long i,
long r,
long c
)
{
dlib::vector<double,2> p(c,r);
dlib::vector<double,3> start_point(c,r,i);
dlib::vector<double,3> interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c);
//cout << "inter: " << trans(interpolated_point);
interest_point temp;
if (max(abs(interpolated_point)) < 0.5)
{
p = (start_point+interpolated_point)*pyr.get_step_size(o);
const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1;
const double filter_size = 3*lobe_size;
const double scale = 1.2/9.0 * filter_size;
temp.center = p;
temp.scale = scale;
temp.score = pyr.get_value(o,i,r,c);
temp.laplacian = pyr.get_laplacian(o,i,r,c);
}
else
{
// this indicates to the caller that no interest point was found.
temp.score = -1;
}
return temp;
}
}
// ----------------------------------------------------------------------------------------
template <typename Alloc>
void get_interest_points (
const hessian_pyramid& pyr,
double threshold,
std::vector<interest_point,Alloc>& result_points
)
{
DLIB_ASSERT(threshold >= 0,
"\tvoid get_interest_points()"
<< "\n\t Invalid arguments to this function"
<< "\n\t threshold: " << threshold
);
using namespace std;
using namespace hessian_pyramid_helpers;
result_points.clear();
for (long o = 0; o < pyr.octaves(); ++o)
{
const long nr = pyr.nr(o);
const long nc = pyr.nc(o);
// do non-maximum suppression on all the intervals in the current octave and
// accumulate the results in result_points
for (long i = 1; i < pyr.intervals()-1; i += 1)
{
const long border_size = pyr.get_border_size(i+1);
for (long r = border_size+1; r < nr - border_size-1; r += 1)
{
for (long c = border_size+1; c < nc - border_size-1; c += 1)
{
double max_val = pyr.get_value(o,i,r,c);
long max_i = i;
long max_r = r;
long max_c = c;
// If the max point we found is really a maximum in its own region and
// is big enough then add it to the results.
if (max_val >= threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c))
{
//cout << max_val << endl;
interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c);
if (sp.score >= threshold)
{
result_points.push_back(sp);
}
}
}
}
}
}
}
// ----------------------------------------------------------------------------------------
template <typename Alloc>
void get_interest_points (
const hessian_pyramid& pyr,
double threshold,
std_vector_c<interest_point,Alloc>& result_points
)
/*!
This function is just an overload that automatically casts std_vector_c objects
into std::vector objects. (Usually this is automatic but the template argument
there messes up the conversion so we have to do it explicitly)
!*/
{
std::vector<interest_point,Alloc>& v = result_points;
get_interest_points(pyr, threshold, v);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_HESSIAN_PYRAMId_Hh_