// Copyright (C) 2011 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_SCAN_iMAGE_ABSTRACT_Hh_ #ifdef DLIB_SCAN_iMAGE_ABSTRACT_Hh_ #include <vector> #include <utility> #include "../algs.h" #include "../image_processing/generic_image.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename image_array_type > bool all_images_same_size ( const image_array_type& images ); /*! requires - image_array_type == an implementation of array/array_kernel_abstract.h - image_array_type::type == an image object that implements the interface defined in dlib/image_processing/generic_image.h ensures - if (all elements of images have the same dimensions (i.e. for all i and j: get_rect(images[i]) == get_rect(images[j]))) then - returns true - else - returns false !*/ // ---------------------------------------------------------------------------------------- template < typename image_array_type > double sum_of_rects_in_images ( const image_array_type& images, const std::vector<std::pair<unsigned int, rectangle> >& rects, const point& position ); /*! requires - image_array_type == an implementation of array/array_kernel_abstract.h - image_array_type::type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these objects must contain a scalar pixel type (e.g. int rather than rgb_pixel) - all_images_same_size(images) == true - for all valid i: rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) ensures - returns the sum of the pixels inside the given rectangles. To be precise, let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position) from the image images[rects[i].first]. Then this function returns the sum of RECT_SUM[i] for all the valid values of i. !*/ // ---------------------------------------------------------------------------------------- template < typename image_array_type > double sum_of_rects_in_images_movable_parts ( const image_array_type& images, const rectangle& window, const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects, const std::vector<std::pair<unsigned int, rectangle> >& movable_rects, const point& position ); /*! requires - image_array_type == an implementation of array/array_kernel_abstract.h - image_array_type::type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these objects must contain a scalar pixel type (e.g. int rather than rgb_pixel) - all_images_same_size(images) == true - center(window) == point(0,0) - for all valid i: - fixed_rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) - for all valid i: - movable_rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) - center(movable_rects[i].second) == point(0,0) ensures - returns the sum of the pixels inside fixed_rects as well as the sum of the pixels inside movable_rects when these latter rectangles are placed at their highest scoring locations inside the given window. To be precise: - let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x) from the image images[r.first]. - let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing over all the X such that translate_rect(window,position).contains(X) == true. - let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position) - let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0) Then this function returns TOTAL_FIXED + TOTAL_MOVABLE. !*/ // ---------------------------------------------------------------------------------------- template < typename image_type > void find_points_above_thresh ( std::vector<std::pair<double, point> >& dets, const image_type& img, const double thresh, const unsigned long max_dets ); /*! requires - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these it must contain a scalar pixel type (e.g. int rather than rgb_pixel) ensures - #dets == a list of points from img which had pixel values >= thresh. - Specifically, we have: - #dets.size() <= max_dets (note that dets is cleared before new detections are added by find_points_above_thresh()) - for all valid i: - #dets[i].first == img[#dets[i].second.y()][#dets[i].second.x()] (i.e. the first field contains the value of the pixel at this detection location) - #dets[i].first >= thresh - if (there are more than max_dets locations that pass the above threshold test) then - #dets == a random subsample of all the locations which passed the threshold test. - else - #dets == all the points which passed the threshold test. !*/ // ---------------------------------------------------------------------------------------- template < typename image_type > std::vector<point> find_peaks ( const image_type& img, const double non_max_suppression_radius, const typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type& thresh ); /*! requires - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these it must contain a scalar pixel type (e.g. int rather than rgb_pixel) - non_max_suppression_radius >= 0 ensures - Scans the given image and finds all pixels with values >= thresh that are also local maximums within their 8-connected neighborhood of the image. Such pixels are collected, sorted in decreasing order of their pixel values, and then non-maximum suppression is applied to this list of points using the given non_max_suppression_radius. The final list of peaks is then returned. Therefore, the returned list, V, will have these properties: - V.size() == the number of peaks found in the image. - When measured in image coordinates, no elements of V are within non_max_suppression_radius distance of each other. That is, for all valid i!=j it is true that length(V[i]-V[j]) > non_max_suppression_radius. - For each element of V, that element has the maximum pixel value of all pixels in the ball centered on that pixel with radius non_max_suppression_radius. !*/ template < typename image_type > std::vector<point> find_peaks ( const image_type& img ); /*! ensures - performs: return find_peaks(img, 0, partition_pixels(img)) !*/ template < typename image_type > std::vector<point> find_peaks ( const image_type& img, const double non_max_suppression_radius ); /*! ensures - performs: return find_peaks(img, non_max_suppression_radius, partition_pixels(img)) !*/ // ---------------------------------------------------------------------------------------- template < typename image_array_type > void scan_image ( std::vector<std::pair<double, point> >& dets, const image_array_type& images, const std::vector<std::pair<unsigned int, rectangle> >& rects, const double thresh, const unsigned long max_dets ); /*! requires - image_array_type == an implementation of array/array_kernel_abstract.h - image_array_type::type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these objects must contain a scalar pixel type (e.g. int rather than rgb_pixel) - images.size() > 0 - rects.size() > 0 - all_images_same_size(images) == true - for all valid i: rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) ensures - slides the set of rectangles over the image space and reports the locations which give a sum bigger than thresh. - Specifically, we have: - #dets.size() <= max_dets (note that dets is cleared before new detections are added by scan_image()) - for all valid i: - #dets[i].first == sum_of_rects_in_images(images,rects,#dets[i].second) >= thresh - if (there are more than max_dets locations that pass the threshold test) then - #dets == a random subsample of all the locations which passed the threshold test. !*/ // ---------------------------------------------------------------------------------------- template < typename image_array_type > void scan_image_movable_parts ( std::vector<std::pair<double, point> >& dets, const image_array_type& images, const rectangle& window, const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects, const std::vector<std::pair<unsigned int, rectangle> >& movable_rects, const double thresh, const unsigned long max_dets ); /*! requires - image_array_type == an implementation of array/array_kernel_abstract.h - image_array_type::type == an image object that implements the interface defined in dlib/image_processing/generic_image.h. Moreover, these objects must contain a scalar pixel type (e.g. int rather than rgb_pixel) - images.size() > 0 - all_images_same_size(images) == true - center(window) == point(0,0) - window.area() > 0 - for all valid i: - fixed_rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) - for all valid i: - movable_rects[i].first < images.size() (i.e. all the rectangles must reference valid elements of images) - center(movable_rects[i].second) == point(0,0) - movable_rects[i].second.area() > 0 ensures - Scans the given window over the images and reports the locations with a score bigger than thresh. - Specifically, we have: - #dets.size() <= max_dets (note that dets is cleared before new detections are added by scan_image_movable_parts()) - for all valid i: - #dets[i].first == sum_of_rects_in_images_movable_parts(images, window, fixed_rects, movable_rects, #dets[i].second) >= thresh - if (there are more than max_dets locations that pass the above threshold test) then - #dets == a random subsample of all the locations which passed the threshold test. !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_SCAN_iMAGE_ABSTRACT_Hh_