// Copyright (C) 2009 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_HESSIAN_PYRAMId_Hh_ #define DLIB_HESSIAN_PYRAMId_Hh_ #include "hessian_pyramid_abstract.h" #include "../algs.h" #include "../image_transforms/integral_image.h" #include "../array.h" #include "../array2d.h" #include "../noncopyable.h" #include "../matrix.h" #include "../stl_checked.h" #include #include namespace dlib { // ---------------------------------------------------------------------------------------- struct interest_point { interest_point() : scale(0), score(0), laplacian(0) {} dlib::vector center; double scale; double score; double laplacian; bool operator < (const interest_point& p) const { return score < p.score; } }; // ---------------------------------------------------------------------------------------- inline void serialize( const interest_point& item, std::ostream& out ) { try { serialize(item.center,out); serialize(item.scale,out); serialize(item.score,out); serialize(item.laplacian,out); } catch (serialization_error& e) { throw serialization_error(e.info + "\n while serializing object of type interest_point"); } } // ---------------------------------------------------------------------------------------- inline void deserialize( interest_point& item, std::istream& in ) { try { deserialize(item.center,in); deserialize(item.scale,in); deserialize(item.score,in); deserialize(item.laplacian,in); } catch (serialization_error& e) { throw serialization_error(e.info + "\n while deserializing object of type interest_point"); } } // ---------------------------------------------------------------------------------------- class hessian_pyramid : noncopyable { public: hessian_pyramid() { num_octaves = 0; num_intervals = 0; initial_step_size = 0; } template void build_pyramid ( const integral_image_type& img, long num_octaves, long num_intervals, long initial_step_size ) { DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0, "\tvoid build_pyramid()" << "\n\tAll arguments to this function must be > 0" << "\n\t this: " << this << "\n\t num_octaves: " << num_octaves << "\n\t num_intervals: " << num_intervals << "\n\t initial_step_size: " << initial_step_size ); this->num_octaves = num_octaves; this->num_intervals = num_intervals; this->initial_step_size = initial_step_size; // allocate space for the pyramid pyramid.resize(num_octaves*num_intervals); for (long o = 0; o < num_octaves; ++o) { const long step_size = get_step_size(o); for (long i = 0; i < num_intervals; ++i) { pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size); } } // now fill out the pyramid with data for (long o = 0; o < num_octaves; ++o) { const long step_size = get_step_size(o); for (long i = 0; i < num_intervals; ++i) { const long border_size = get_border_size(i)*step_size; const long lobe_size = static_cast(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1; const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0); const long lobe_offset = lobe_size/2+1; const point tl(-lobe_offset,-lobe_offset); const point tr(lobe_offset,-lobe_offset); const point bl(-lobe_offset,lobe_offset); const point br(lobe_offset,lobe_offset); for (long r = border_size; r < img.nr() - border_size; r += step_size) { for (long c = border_size; c < img.nc() - border_size; c += step_size) { const point p(c,r); double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) - img.get_sum_of_area(centered_rect(p, lobe_size, 2*lobe_size-1))*3.0; double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) - img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0; double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) + img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) - img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) - img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size)); // now we normalize the filter responses Dxx *= area_inv; Dyy *= area_inv; Dxy *= area_inv; double sign_of_laplacian = +1; if (Dxx + Dyy < 0) sign_of_laplacian = -1; double determinant = Dxx*Dyy - 0.81*Dxy*Dxy; // If the determinant is negative then just blank it out by setting // it to zero. if (determinant < 0) determinant = 0; // Save the determinant of the Hessian into our image pyramid. Also // pack the laplacian sign into the value so we can get it out later. pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant; } } } } } long get_border_size ( long interval ) const { DLIB_ASSERT(0 <= interval && interval < intervals(), "\tlong get_border_size(interval)" << "\n\tInvalid interval value" << "\n\t this: " << this << "\n\t interval: " << interval ); const double lobe_size = 2.0*(interval+1) + 1; const double filter_size = 3*lobe_size; const long bs = static_cast(std::ceil(filter_size/2.0)); return bs; } long get_step_size ( long octave ) const { DLIB_ASSERT(0 <= octave && octave < octaves(), "\tlong get_step_size(octave)" << "\n\tInvalid octave value" << "\n\t this: " << this << "\n\t octave: " << octave ); return initial_step_size*static_cast(std::pow(2.0, (double)octave)+0.5); } long nr ( long octave ) const { DLIB_ASSERT(0 <= octave && octave < octaves(), "\tlong nr(octave)" << "\n\tInvalid octave value" << "\n\t this: " << this << "\n\t octave: " << octave ); return pyramid[num_intervals*octave].nr(); } long nc ( long octave ) const { DLIB_ASSERT(0 <= octave && octave < octaves(), "\tlong nc(octave)" << "\n\tInvalid octave value" << "\n\t this: " << this << "\n\t octave: " << octave ); return pyramid[num_intervals*octave].nc(); } double get_value ( long octave, long interval, long r, long c ) const { DLIB_ASSERT(0 <= octave && octave < octaves() && 0 <= interval && interval < intervals() && get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), "\tdouble get_value(octave, interval, r, c)" << "\n\tInvalid inputs to this function" << "\n\t this: " << this << "\n\t octave: " << octave << "\n\t interval: " << interval << "\n\t octaves: " << octaves() << "\n\t intervals: " << intervals() << "\n\t r: " << r << "\n\t c: " << c << "\n\t nr(octave): " << nr(octave) << "\n\t nc(octave): " << nc(octave) << "\n\t get_border_size(interval): " << get_border_size(interval) ); return std::abs(pyramid[num_intervals*octave + interval][r][c]); } double get_laplacian ( long octave, long interval, long r, long c ) const { DLIB_ASSERT(0 <= octave && octave < octaves() && 0 <= interval && interval < intervals() && get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), "\tdouble get_laplacian(octave, interval, r, c)" << "\n\tInvalid inputs to this function" << "\n\t this: " << this << "\n\t octave: " << octave << "\n\t interval: " << interval << "\n\t octaves: " << octaves() << "\n\t intervals: " << intervals() << "\n\t r: " << r << "\n\t c: " << c << "\n\t nr(octave): " << nr(octave) << "\n\t nc(octave): " << nc(octave) << "\n\t get_border_size(interval): " << get_border_size(interval) ); // return the sign of the laplacian if (pyramid[num_intervals*octave + interval][r][c] > 0) return +1; else return -1; } long octaves ( ) const { return num_octaves; } long intervals ( ) const { return num_intervals; } private: long num_octaves; long num_intervals; long initial_step_size; typedef array2d image_type; typedef array pyramid_type; pyramid_type pyramid; }; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- namespace hessian_pyramid_helpers { inline bool is_maximum_in_region( const hessian_pyramid& pyr, long o, long i, long r, long c ) { // First check if this point is near the edge of the octave // If it is then we say it isn't a maximum as these points are // not as reliable. if (i <= 0 || i+1 >= pyr.intervals()) { return false; } const double val = pyr.get_value(o,i,r,c); // now check if there are any bigger values around this guy for (long ii = i-1; ii <= i+1; ++ii) { for (long rr = r-1; rr <= r+1; ++rr) { for (long cc = c-1; cc <= c+1; ++cc) { if (pyr.get_value(o,ii,rr,cc) > val) return false; } } } return true; } // ------------------------------------------------------------------------------------ inline const matrix get_hessian_gradient ( const hessian_pyramid& pyr, long o, long i, long r, long c ) { matrix grad; grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0; grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0; grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0; return grad; } // ------------------------------------------------------------------------------------ inline const matrix get_hessian_hessian ( const hessian_pyramid& pyr, long o, long i, long r, long c ) { matrix hess; const double val = pyr.get_value(o,i,r,c); double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val; double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val; double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val; double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) - pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0; double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) - pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0; double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) - pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0; hess = Dxx, Dxy, Dxs, Dxy, Dyy, Dys, Dxs, Dys, Dss; return hess; } // ------------------------------------------------------------------------------------ inline const interest_point interpolate_point ( const hessian_pyramid& pyr, long o, long i, long r, long c ) { dlib::vector p(c,r); dlib::vector start_point(c,r,i); dlib::vector interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c); //cout << "inter: " << trans(interpolated_point); interest_point temp; if (max(abs(interpolated_point)) < 0.5) { p = (start_point+interpolated_point)*pyr.get_step_size(o); const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1; const double filter_size = 3*lobe_size; const double scale = 1.2/9.0 * filter_size; temp.center = p; temp.scale = scale; temp.score = pyr.get_value(o,i,r,c); temp.laplacian = pyr.get_laplacian(o,i,r,c); } else { // this indicates to the caller that no interest point was found. temp.score = -1; } return temp; } } // ---------------------------------------------------------------------------------------- template void get_interest_points ( const hessian_pyramid& pyr, double threshold, std::vector& result_points ) { DLIB_ASSERT(threshold >= 0, "\tvoid get_interest_points()" << "\n\t Invalid arguments to this function" << "\n\t threshold: " << threshold ); using namespace std; using namespace hessian_pyramid_helpers; result_points.clear(); for (long o = 0; o < pyr.octaves(); ++o) { const long nr = pyr.nr(o); const long nc = pyr.nc(o); // do non-maximum suppression on all the intervals in the current octave and // accumulate the results in result_points for (long i = 1; i < pyr.intervals()-1; i += 1) { const long border_size = pyr.get_border_size(i+1); for (long r = border_size+1; r < nr - border_size-1; r += 1) { for (long c = border_size+1; c < nc - border_size-1; c += 1) { double max_val = pyr.get_value(o,i,r,c); long max_i = i; long max_r = r; long max_c = c; // If the max point we found is really a maximum in its own region and // is big enough then add it to the results. if (max_val >= threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c)) { //cout << max_val << endl; interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c); if (sp.score >= threshold) { result_points.push_back(sp); } } } } } } } // ---------------------------------------------------------------------------------------- template void get_interest_points ( const hessian_pyramid& pyr, double threshold, std_vector_c& result_points ) /*! This function is just an overload that automatically casts std_vector_c objects into std::vector objects. (Usually this is automatic but the template argument there messes up the conversion so we have to do it explicitly) !*/ { std::vector& v = result_points; get_interest_points(pyr, threshold, v); } // ---------------------------------------------------------------------------------------- } #endif // DLIB_HESSIAN_PYRAMId_Hh_