// Copyright (C) 2012 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_INTERPOlATIONh_ #define DLIB_INTERPOlATIONh_ #include "../threads.h" #include <algorithm> #include "interpolation_abstract.h" #include "../pixel.h" #include "../matrix.h" #include "assign_image.h" #include "image_pyramid.h" #include "../simd.h" #include "../image_processing/full_object_detection.h" #include <limits> #include <array> #include "../rand.h" namespace dlib { // ---------------------------------------------------------------------------------------- template <typename T> struct sub_image_proxy { sub_image_proxy() = default; sub_image_proxy ( T& img, rectangle rect ) { rect = rect.intersect(get_rect(img)); typedef typename image_traits<T>::pixel_type pixel_type; _nr = rect.height(); _nc = rect.width(); _width_step = width_step(img); _data = (char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; } void* _data = 0; long _width_step = 0; long _nr = 0; long _nc = 0; }; template <typename T> struct const_sub_image_proxy { const_sub_image_proxy() = default; const_sub_image_proxy ( const T& img, rectangle rect ) { rect = rect.intersect(get_rect(img)); typedef typename image_traits<T>::pixel_type pixel_type; _nr = rect.height(); _nc = rect.width(); _width_step = width_step(img); _data = (const char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; } const void* _data = 0; long _width_step = 0; long _nr = 0; long _nc = 0; }; template <typename T> struct image_traits<sub_image_proxy<T> > { typedef typename image_traits<T>::pixel_type pixel_type; }; template <typename T> struct image_traits<const sub_image_proxy<T> > { typedef typename image_traits<T>::pixel_type pixel_type; }; template <typename T> struct image_traits<const_sub_image_proxy<T> > { typedef typename image_traits<T>::pixel_type pixel_type; }; template <typename T> struct image_traits<const const_sub_image_proxy<T> > { typedef typename image_traits<T>::pixel_type pixel_type; }; template <typename T> inline long num_rows( const sub_image_proxy<T>& img) { return img._nr; } template <typename T> inline long num_columns( const sub_image_proxy<T>& img) { return img._nc; } template <typename T> inline long num_rows( const const_sub_image_proxy<T>& img) { return img._nr; } template <typename T> inline long num_columns( const const_sub_image_proxy<T>& img) { return img._nc; } template <typename T> inline void* image_data( sub_image_proxy<T>& img) { return img._data; } template <typename T> inline const void* image_data( const sub_image_proxy<T>& img) { return img._data; } template <typename T> inline const void* image_data( const const_sub_image_proxy<T>& img) { return img._data; } template <typename T> inline long width_step( const sub_image_proxy<T>& img ) { return img._width_step; } template <typename T> inline long width_step( const const_sub_image_proxy<T>& img ) { return img._width_step; } template <typename T> void set_image_size(sub_image_proxy<T>& img, long rows, long cols) { DLIB_CASSERT(img._nr == rows && img._nc == cols, "A sub_image can't be resized." << "\n\t img._nr: "<< img._nr << "\n\t img._nc: "<< img._nc << "\n\t rows: "<< rows << "\n\t cols: "<< cols ); } template < typename image_type > sub_image_proxy<image_type> sub_image ( image_type& img, const rectangle& rect ) { return sub_image_proxy<image_type>(img,rect); } template < typename image_type > const const_sub_image_proxy<image_type> sub_image ( const image_type& img, const rectangle& rect ) { return const_sub_image_proxy<image_type>(img,rect); } template <typename T> inline sub_image_proxy<matrix<T>> sub_image ( T* img, long nr, long nc, long row_stride ) { sub_image_proxy<matrix<T>> tmp; tmp._data = img; tmp._nr = nr; tmp._nc = nc; tmp._width_step = row_stride*sizeof(T); return tmp; } template <typename T> inline const const_sub_image_proxy<matrix<T>> sub_image ( const T* img, long nr, long nc, long row_stride ) { const_sub_image_proxy<matrix<T>> tmp; tmp._data = img; tmp._nr = nr; tmp._nc = nc; tmp._width_step = row_stride*sizeof(T); return tmp; } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- class interpolate_nearest_neighbor { public: template <typename image_view_type, typename pixel_type> bool operator() ( const image_view_type& img, const dlib::point& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); if (get_rect(img).contains(p)) { assign_pixel(result, img[p.y()][p.x()]); return true; } else { return false; } } }; // ---------------------------------------------------------------------------------------- class interpolate_bilinear { public: template <typename T, typename image_view_type, typename pixel_type> bool operator() ( const image_view_type& img, const dlib::vector<T,2>& p, pixel_type& result ) const { // Assign pixel gives special meaning to alpha channel that would break interpolation static_assert(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false, "Images with alpha channel not supported"); // Interpolation currently supports only fully cartesian (non-polar) spaces. static_assert(is_color_space_cartesian_image<image_view_type>::value == true, "Non-cartesian color space used in interpolation"); const long left = static_cast<long>(std::floor(p.x())); const long top = static_cast<long>(std::floor(p.y())); const long right = left+1; const long bottom = top+1; // if the interpolation goes outside img if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) return false; const double lr_frac = p.x() - left; const double tb_frac = p.y() - top; const auto tl = pixel_to_vector<double>(img[top][left]); const auto tr = pixel_to_vector<double>(img[top][right]); const auto bl = pixel_to_vector<double>(img[bottom][left]); const auto br = pixel_to_vector<double>(img[bottom][right]); typename image_view_type::pixel_type temp; vector_to_pixel(temp, (1 - tb_frac) * ((1 - lr_frac) * tl + lr_frac * tr) + tb_frac * ((1 - lr_frac) * bl + lr_frac * br)); assign_pixel(result, temp); return true; } }; // ---------------------------------------------------------------------------------------- class interpolate_quadratic { public: template <typename T, typename image_view_type, typename pixel_type> bool operator() ( const image_view_type& img, const dlib::vector<T,2>& p, pixel_type& result ) const { // Assign pixel gives special meaning to alpha channel that would break interpolation static_assert(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false, "Images with alpha channel not supported"); // Interpolation currently supports only fully cartesian (non-polar) spaces. static_assert(is_color_space_cartesian_image<image_view_type>::value == true, "Non-cartesian color space used in interpolation"); using traits = pixel_traits<typename image_view_type::pixel_type>; const point pp(p); // if the interpolation goes outside img if (!get_rect(img).contains(grow_rect(pp,1))) return false; const long r = pp.y(); const long c = pp.x(); matrix<double, traits::num, 1> pvout; for (long i = 0; i < traits::num; ++i) pvout(i) = interpolate(p-pp, pixel_to_vector<double>(img[r-1][c-1])(i), pixel_to_vector<double>(img[r-1][c ])(i), pixel_to_vector<double>(img[r-1][c+1])(i), pixel_to_vector<double>(img[r ][c-1])(i), pixel_to_vector<double>(img[r ][c ])(i), pixel_to_vector<double>(img[r ][c+1])(i), pixel_to_vector<double>(img[r+1][c-1])(i), pixel_to_vector<double>(img[r+1][c ])(i), pixel_to_vector<double>(img[r+1][c+1])(i)); typename image_view_type::pixel_type temp; vector_to_pixel(temp, pvout); assign_pixel(result, temp); return true; } private: /* tl tm tr ml mm mr bl bm br */ // The above is the pixel layout in our little 3x3 neighborhood. interpolate() will // fit a quadratic to these 9 pixels and then use that quadratic to find the interpolated // value at point p. inline double interpolate( const dlib::vector<double,2>& p, double tl, double tm, double tr, double ml, double mm, double mr, double bl, double bm, double br ) const { matrix<double,6,1> w; // x w(0) = (tr + mr + br - tl - ml - bl)*0.16666666666; // y w(1) = (bl + bm + br - tl - tm - tr)*0.16666666666; // x^2 w(2) = (tl + tr + ml + mr + bl + br)*0.16666666666 - (tm + mm + bm)*0.333333333; // x*y w(3) = (tl - tr - bl + br)*0.25; // y^2 w(4) = (tl + tm + tr + bl + bm + br)*0.16666666666 - (ml + mm + mr)*0.333333333; // 1 (constant term) w(5) = (tm + ml + mr + bm)*0.222222222 - (tl + tr + bl + br)*0.11111111 + (mm)*0.55555556; const double x = p.x(); const double y = p.y(); matrix<double,6,1> z; z = x, y, x*x, x*y, y*y, 1.0; return dot(w,z); } }; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- class black_background { public: template <typename pixel_type> void operator() ( pixel_type& p) const { assign_pixel(p, 0); } }; class white_background { public: template <typename pixel_type> void operator() ( pixel_type& p) const { assign_pixel(p, 255); } }; class no_background { public: template <typename pixel_type> void operator() ( pixel_type& ) const { } }; // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type, typename background_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point, const background_type& set_background, const rectangle& area ) { // make sure requires clause is not broken DLIB_ASSERT( get_rect(out_img).contains(area) == true && is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t get_rect(out_img).contains(area): " << get_rect(out_img).contains(area) << "\n\t get_rect(out_img): " << get_rect(out_img) << "\n\t area: " << area << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const_image_view<image_type1> imgv(in_img); image_view<image_type2> out_imgv(out_img); for (long r = area.top(); r <= area.bottom(); ++r) { for (long c = area.left(); c <= area.right(); ++c) { if (!interp(imgv, map_point(dlib::vector<double,2>(c,r)), out_imgv[r][c])) set_background(out_imgv[r][c]); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type, typename background_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point, const background_type& set_background ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > point_transform_affine rotate_image ( const image_type1& in_img, image_type2& out_img, double angle, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t point_transform_affine rotate_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const rectangle rimg = get_rect(in_img); // figure out bounding box for rotated rectangle rectangle rect; rect += rotate_point(center(rimg), rimg.tl_corner(), -angle); rect += rotate_point(center(rimg), rimg.tr_corner(), -angle); rect += rotate_point(center(rimg), rimg.bl_corner(), -angle); rect += rotate_point(center(rimg), rimg.br_corner(), -angle); set_image_size(out_img, rect.height(), rect.width()); const matrix<double,2,2> R = rotation_matrix(angle); point_transform_affine trans = point_transform_affine(R, -R*dcenter(get_rect(out_img)) + dcenter(rimg)); transform_image(in_img, out_img, interp, trans); return inv(trans); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > point_transform_affine rotate_image ( const image_type1& in_img, image_type2& out_img, double angle ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t point_transform_affine rotate_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); return rotate_image(in_img, out_img, angle, interpolate_quadratic()); } // ---------------------------------------------------------------------------------------- namespace impl { class helper_resize_image { public: helper_resize_image( double x_scale_, double y_scale_ ): x_scale(x_scale_), y_scale(y_scale_) {} dlib::vector<double,2> operator() ( const dlib::vector<double,2>& p ) const { return dlib::vector<double,2>(p.x()*x_scale, p.y()*y_scale); } private: const double x_scale; const double y_scale; }; } template < typename image_type1, typename image_type2, typename interpolation_type > void resize_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const double x_scale = (num_columns(in_img)-1)/(double)std::max<long>((num_columns(out_img)-1),1); const double y_scale = (num_rows(in_img)-1)/(double)std::max<long>((num_rows(out_img)-1),1); transform_image(in_img, out_img, interp, dlib::impl::helper_resize_image(x_scale,y_scale)); } // ---------------------------------------------------------------------------------------- // This is an optimized version of resize_image for the case where bilinear // interpolation is used. template < typename image_type1, typename image_type2 > typename disable_if_c<(is_rgb_image<image_type1>::value&&is_rgb_image<image_type2>::value) || (is_grayscale_image<image_type1>::value&&is_grayscale_image<image_type2>::value)>::type resize_image ( const image_type1& in_img_, image_type2& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view<image_type1> in_img(in_img_); image_view<image_type2> out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits<image_type1>::pixel_type T; typedef typename image_traits<image_type2>::pixel_type U; const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast<long>(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -x_scale; if (pixel_traits<U>::grayscale) { for (long c = 0; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast<long>(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; double tl = 0, tr = 0, bl = 0, br = 0; assign_pixel(tl, in_img[top][left]); assign_pixel(tr, in_img[top][right]); assign_pixel(bl, in_img[bottom][left]); assign_pixel(br, in_img[bottom][right]); double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); assign_pixel(out_img[r][c], temp); } } else { for (long c = 0; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast<long>(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; const T tl = in_img[top][left]; const T tr = in_img[top][right]; const T bl = in_img[bottom][left]; const T br = in_img[bottom][right]; T temp; assign_pixel(temp, 0); vector_to_pixel(temp, (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); assign_pixel(out_img[r][c], temp); } } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > struct images_have_same_pixel_types { typedef typename image_traits<image_type1>::pixel_type ptype1; typedef typename image_traits<image_type2>::pixel_type ptype2; const static bool value = is_same_type<ptype1, ptype2>::value; }; template < typename image_type, typename image_type2 > typename enable_if_c<is_grayscale_image<image_type>::value && is_grayscale_image<image_type2>::value && images_have_same_pixel_types<image_type,image_type2>::value>::type resize_image ( const image_type& in_img_, image_type2& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view<image_type> in_img(in_img_); image_view<image_type2> out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits<image_type>::pixel_type T; const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast<long>(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -4*x_scale; const simd4f _tb_frac = tb_frac; const simd4f _inv_tb_frac = 1-tb_frac; const simd4f _x_scale = 4*x_scale; simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); long c = 0; for (;; c+=4) { _x += _x_scale; simd4i left = simd4i(_x); simd4f _lr_frac = _x-left; simd4f _inv_lr_frac = 1-_lr_frac; simd4i right = left+1; simd4f tlf = _inv_tb_frac*_inv_lr_frac; simd4f trf = _inv_tb_frac*_lr_frac; simd4f blf = _tb_frac*_inv_lr_frac; simd4f brf = _tb_frac*_lr_frac; int32 fleft[4]; int32 fright[4]; left.store(fleft); right.store(fright); if (fright[3] >= in_img.nc()) break; simd4f tl(in_img[top][fleft[0]], in_img[top][fleft[1]], in_img[top][fleft[2]], in_img[top][fleft[3]]); simd4f tr(in_img[top][fright[0]], in_img[top][fright[1]], in_img[top][fright[2]], in_img[top][fright[3]]); simd4f bl(in_img[bottom][fleft[0]], in_img[bottom][fleft[1]], in_img[bottom][fleft[2]], in_img[bottom][fleft[3]]); simd4f br(in_img[bottom][fright[0]], in_img[bottom][fright[1]], in_img[bottom][fright[2]], in_img[bottom][fright[3]]); simd4f out = simd4f(tlf*tl + trf*tr + blf*bl + brf*br); float fout[4]; out.store(fout); const auto convert_to_output_type = [](float value) { if (std::is_integral<T>::value) return static_cast<T>(value + 0.5); else return static_cast<T>(value); }; out_img[r][c] = convert_to_output_type(fout[0]); out_img[r][c+1] = convert_to_output_type(fout[1]); out_img[r][c+2] = convert_to_output_type(fout[2]); out_img[r][c+3] = convert_to_output_type(fout[3]); } x = -x_scale + c*x_scale; for (; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast<long>(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const float lr_frac = x - left; float tl = 0, tr = 0, bl = 0, br = 0; assign_pixel(tl, in_img[top][left]); assign_pixel(tr, in_img[top][right]); assign_pixel(bl, in_img[bottom][left]); assign_pixel(br, in_img[bottom][right]); float temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); assign_pixel(out_img[r][c], temp); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > typename enable_if_c<is_rgb_image<image_type1>::value && is_rgb_image<image_type2>::value >::type resize_image ( const image_type1& in_img_, image_type2& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view<image_type1> in_img(in_img_); image_view<image_type2> out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits<image_type1>::pixel_type T; const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast<long>(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -4*x_scale; const simd4f _tb_frac = tb_frac; const simd4f _inv_tb_frac = 1-tb_frac; const simd4f _x_scale = 4*x_scale; simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); long c = 0; for (;; c+=4) { _x += _x_scale; simd4i left = simd4i(_x); simd4f lr_frac = _x-left; simd4f _inv_lr_frac = 1-lr_frac; simd4i right = left+1; simd4f tlf = _inv_tb_frac*_inv_lr_frac; simd4f trf = _inv_tb_frac*lr_frac; simd4f blf = _tb_frac*_inv_lr_frac; simd4f brf = _tb_frac*lr_frac; int32 fleft[4]; int32 fright[4]; left.store(fleft); right.store(fright); if (fright[3] >= in_img.nc()) break; simd4f tl(in_img[top][fleft[0]].red, in_img[top][fleft[1]].red, in_img[top][fleft[2]].red, in_img[top][fleft[3]].red); simd4f tr(in_img[top][fright[0]].red, in_img[top][fright[1]].red, in_img[top][fright[2]].red, in_img[top][fright[3]].red); simd4f bl(in_img[bottom][fleft[0]].red, in_img[bottom][fleft[1]].red, in_img[bottom][fleft[2]].red, in_img[bottom][fleft[3]].red); simd4f br(in_img[bottom][fright[0]].red, in_img[bottom][fright[1]].red, in_img[bottom][fright[2]].red, in_img[bottom][fright[3]].red); simd4i out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); int32 fout[4]; out.store(fout); out_img[r][c].red = static_cast<unsigned char>(fout[0]); out_img[r][c+1].red = static_cast<unsigned char>(fout[1]); out_img[r][c+2].red = static_cast<unsigned char>(fout[2]); out_img[r][c+3].red = static_cast<unsigned char>(fout[3]); tl = simd4f(in_img[top][fleft[0]].green, in_img[top][fleft[1]].green, in_img[top][fleft[2]].green, in_img[top][fleft[3]].green); tr = simd4f(in_img[top][fright[0]].green, in_img[top][fright[1]].green, in_img[top][fright[2]].green, in_img[top][fright[3]].green); bl = simd4f(in_img[bottom][fleft[0]].green, in_img[bottom][fleft[1]].green, in_img[bottom][fleft[2]].green, in_img[bottom][fleft[3]].green); br = simd4f(in_img[bottom][fright[0]].green, in_img[bottom][fright[1]].green, in_img[bottom][fright[2]].green, in_img[bottom][fright[3]].green); out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); out.store(fout); out_img[r][c].green = static_cast<unsigned char>(fout[0]); out_img[r][c+1].green = static_cast<unsigned char>(fout[1]); out_img[r][c+2].green = static_cast<unsigned char>(fout[2]); out_img[r][c+3].green = static_cast<unsigned char>(fout[3]); tl = simd4f(in_img[top][fleft[0]].blue, in_img[top][fleft[1]].blue, in_img[top][fleft[2]].blue, in_img[top][fleft[3]].blue); tr = simd4f(in_img[top][fright[0]].blue, in_img[top][fright[1]].blue, in_img[top][fright[2]].blue, in_img[top][fright[3]].blue); bl = simd4f(in_img[bottom][fleft[0]].blue, in_img[bottom][fleft[1]].blue, in_img[bottom][fleft[2]].blue, in_img[bottom][fleft[3]].blue); br = simd4f(in_img[bottom][fright[0]].blue, in_img[bottom][fright[1]].blue, in_img[bottom][fright[2]].blue, in_img[bottom][fright[3]].blue); out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); out.store(fout); out_img[r][c].blue = static_cast<unsigned char>(fout[0]); out_img[r][c+1].blue = static_cast<unsigned char>(fout[1]); out_img[r][c+2].blue = static_cast<unsigned char>(fout[2]); out_img[r][c+3].blue = static_cast<unsigned char>(fout[3]); } x = -x_scale + c*x_scale; for (; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast<long>(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; const T tl = in_img[top][left]; const T tr = in_img[top][right]; const T bl = in_img[bottom][left]; const T br = in_img[bottom][right]; T temp; assign_pixel(temp, 0); vector_to_pixel(temp, (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); assign_pixel(out_img[r][c], temp); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void resize_image ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); resize_image(in_img, out_img, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type > void resize_image ( double size_scale, image_type& img ) { // make sure requires clause is not broken DLIB_ASSERT( size_scale > 0 , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t size_scale: " << size_scale ); if (size_scale == 1.0) return; // no need to do anything at all image_type temp; set_image_size(temp, std::round(size_scale*num_rows(img)), std::round(size_scale*num_columns(img))); resize_image(img, temp); swap(img, temp); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > point_transform_affine flip_image_left_right ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void flip_image_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); assign_image(out_img, fliplr(mat(in_img))); std::vector<dlib::vector<double,2> > from, to; rectangle r = get_rect(in_img); from.push_back(r.tl_corner()); to.push_back(r.tr_corner()); from.push_back(r.bl_corner()); to.push_back(r.br_corner()); from.push_back(r.tr_corner()); to.push_back(r.tl_corner()); from.push_back(r.br_corner()); to.push_back(r.bl_corner()); return find_affine_transform(from,to); } // ---------------------------------------------------------------------------------------- template < typename image_type > point_transform_affine flip_image_left_right ( image_type& img ) { image_type temp; auto tform = flip_image_left_right(img, temp); swap(temp,img); return tform; } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void flip_image_up_down ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void flip_image_up_down()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); assign_image(out_img, flipud(mat(in_img))); } // ---------------------------------------------------------------------------------------- namespace impl { inline rectangle flip_rect_left_right ( const rectangle& rect, const rectangle& window ) { rectangle temp; temp.top() = rect.top(); temp.bottom() = rect.bottom(); const long left_dist = rect.left()-window.left(); temp.right() = window.right()-left_dist; temp.left() = temp.right()-rect.width()+1; return temp; } inline rectangle tform_object ( const rectangle_transform& tran, const rectangle& rect ) { return tran(rect); } inline mmod_rect tform_object ( const rectangle_transform& tran, mmod_rect rect ) { rect.rect = tform_object(tran, rect.rect); return rect; } inline full_object_detection tform_object( const point_transform_affine& tran, const full_object_detection& obj ) { std::vector<point> parts; parts.reserve(obj.num_parts()); for (unsigned long i = 0; i < obj.num_parts(); ++i) { if (obj.part(i) != OBJECT_PART_NOT_PRESENT) parts.push_back(tran(obj.part(i))); else parts.push_back(OBJECT_PART_NOT_PRESENT); } return full_object_detection(tform_object(tran,obj.get_rect()), parts); } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename T > void add_image_left_right_flips ( image_array_type& images, std::vector<std::vector<T> >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void add_image_left_right_flips()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; std::vector<T> rects; const unsigned long num = images.size(); for (unsigned long j = 0; j < num; ++j) { const point_transform_affine tran = flip_image_left_right(images[j], temp); rects.clear(); for (unsigned long i = 0; i < objects[j].size(); ++i) rects.push_back(impl::tform_object(tran, objects[j][i])); images.push_back(std::move(temp)); objects.push_back(rects); } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename T, typename U > void add_image_left_right_flips ( image_array_type& images, std::vector<std::vector<T> >& objects, std::vector<std::vector<U> >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void add_image_left_right_flips()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; std::vector<T> rects; std::vector<U> rects2; const unsigned long num = images.size(); for (unsigned long j = 0; j < num; ++j) { const point_transform_affine tran = flip_image_left_right(images[j], temp); images.push_back(std::move(temp)); rects.clear(); for (unsigned long i = 0; i < objects[j].size(); ++i) rects.push_back(impl::tform_object(tran, objects[j][i])); objects.push_back(rects); rects2.clear(); for (unsigned long i = 0; i < objects2[j].size(); ++i) rects2.push_back(impl::tform_object(tran, objects2[j][i])); objects2.push_back(rects2); } } // ---------------------------------------------------------------------------------------- template <typename image_array_type> void flip_image_dataset_left_right ( image_array_type& images, std::vector<std::vector<rectangle> >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void flip_image_dataset_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { flip_image_left_right(images[i], temp); swap(temp,images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); } } } // ---------------------------------------------------------------------------------------- template <typename image_array_type> void flip_image_dataset_left_right ( image_array_type& images, std::vector<std::vector<rectangle> >& objects, std::vector<std::vector<rectangle> >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void flip_image_dataset_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { flip_image_left_right(images[i], temp); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { objects2[i][j] = impl::flip_rect_left_right(objects2[i][j], get_rect(images[i])); } } } // ---------------------------------------------------------------------------------------- template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector<std::vector<rectangle> >& objects, unsigned long max_image_size = std::numeric_limits<unsigned long>::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = pyr.rect_up(objects[i][j]); } } } } template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector<std::vector<mmod_rect>>& objects, unsigned long max_image_size = std::numeric_limits<unsigned long>::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j].rect = pyr.rect_up(objects[i][j].rect); } } } } template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector<std::vector<rectangle> >& objects, std::vector<std::vector<rectangle> >& objects2, unsigned long max_image_size = std::numeric_limits<unsigned long>::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = pyr.rect_up(objects[i][j]); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { objects2[i][j] = pyr.rect_up(objects2[i][j]); } } } } // ---------------------------------------------------------------------------------------- template <typename image_array_type> void rotate_image_dataset ( double angle, image_array_type& images, std::vector<std::vector<rectangle> >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void rotate_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { const rectangle_transform tran = rotate_image(images[i], temp, angle); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { const rectangle rect = objects[i][j]; objects[i][j] = tran(rect); } } } template <typename image_array_type> void rotate_image_dataset ( double angle, image_array_type& images, std::vector<std::vector<rectangle> >& objects, std::vector<std::vector<rectangle> >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void rotate_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { const rectangle_transform tran = rotate_image(images[i], temp, angle); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { const rectangle rect = objects[i][j]; objects[i][j] = tran(rect); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { const rectangle rect = objects2[i][j]; objects2[i][j] = tran(rect); } } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename EXP, typename T, typename U > void add_image_rotations ( const matrix_exp<EXP>& angles, image_array_type& images, std::vector<std::vector<T> >& objects, std::vector<std::vector<U> >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( is_vector(angles) && angles.size() > 0 && images.size() == objects.size() && images.size() == objects2.size(), "\t void add_image_rotations()" << "\n\t Invalid inputs were given to this function." << "\n\t is_vector(angles): " << is_vector(angles) << "\n\t angles.size(): " << angles.size() << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); using namespace impl; image_array_type new_images(images.size() * angles.size()); std::vector<std::vector<T>> new_objects(images.size() * angles.size()); std::vector<std::vector<U>> new_objects2(images.size() * angles.size()); dlib::parallel_for(0, images.size(), [&](long j) { typename image_array_type::value_type temp; long dst_base = j * angles.size(); for (long i = 0; i < angles.size(); ++i) { long dst = dst_base + i; const point_transform_affine tran = rotate_image(images[j], temp, angles(i)); exchange(new_images[dst], temp); for (unsigned long k = 0; k < objects[j].size(); ++k) new_objects[dst].push_back(tform_object(tran, objects[j][k])); for (unsigned long k = 0; k < objects2[j].size(); ++k) new_objects2[dst].push_back(tform_object(tran, objects2[j][k])); } }); new_images.swap(images); new_objects.swap(objects); new_objects2.swap(objects2); } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename EXP, typename T > void add_image_rotations ( const matrix_exp<EXP>& angles, image_array_type& images, std::vector<std::vector<T> >& objects ) { std::vector<std::vector<T> > objects2(objects.size()); add_image_rotations(angles, images, objects, objects2); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename pyramid_type, typename interpolation_type > void pyramid_up ( const image_type1& in_img, image_type2& out_img, const pyramid_type& pyr, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void pyramid_up()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); if (image_size(in_img) == 0) { set_image_size(out_img, 0, 0); return; } rectangle rect = get_rect(in_img); rectangle uprect = pyr.rect_up(rect); if (uprect.is_empty()) { set_image_size(out_img, 0, 0); return; } set_image_size(out_img, uprect.bottom()+1, uprect.right()+1); resize_image(in_img, out_img, interp); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename pyramid_type > void pyramid_up ( const image_type1& in_img, image_type2& out_img, const pyramid_type& pyr ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void pyramid_up()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type, typename pyramid_type > void pyramid_up ( image_type& img, const pyramid_type& pyr ) { image_type temp; pyramid_up(img, temp, pyr); swap(temp, img); } // ---------------------------------------------------------------------------------------- template < typename image_type > void pyramid_up ( image_type& img ) { pyramid_down<2> pyr; pyramid_up(img, pyr); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- struct chip_dims { chip_dims ( unsigned long rows_, unsigned long cols_ ) : rows(rows_), cols(cols_) { } unsigned long rows; unsigned long cols; }; struct chip_details { chip_details() : angle(0), rows(0), cols(0) {} chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {} chip_details(const drectangle& rect_) : rect(rect_),angle(0), rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {} chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0) { compute_dims_from_size(size); } chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_) { compute_dims_from_size(size); } chip_details(const drectangle& rect_, const chip_dims& dims) : rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {} chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) : rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {} template <typename T> chip_details( const std::vector<dlib::vector<T,2> >& chip_points, const std::vector<dlib::vector<T,2> >& img_points, const chip_dims& dims ) : rows(dims.rows), cols(dims.cols) { DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2, "\t chip_details::chip_details(chip_points,img_points,dims)" << "\n\t Invalid inputs were given to this function." << "\n\t chip_points.size(): " << chip_points.size() << "\n\t img_points.size(): " << img_points.size() ); const point_transform_affine tform = find_similarity_transform(chip_points,img_points); dlib::vector<double,2> p(1,0); p = tform.get_m()*p; // There are only 3 things happening in a similarity transform. There is a // rescaling, a rotation, and a translation. So here we pick out the scale and // rotation parameters. angle = std::atan2(p.y(),p.x()); // Note that the translation and scale part are represented by the extraction // rectangle. So here we build the appropriate rectangle. const double scale = length(p); rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0), dims.cols*scale, dims.rows*scale); } drectangle rect; double angle; unsigned long rows; unsigned long cols; inline unsigned long size() const { return rows*cols; } private: void compute_dims_from_size ( unsigned long size ) { if (rect.is_empty()) { cols = rows = std::round(std::sqrt((double)size)); } else { const double relative_size = std::sqrt(size/(double)rect.area()); rows = static_cast<unsigned long>(rect.height()*relative_size + 0.5); cols = static_cast<unsigned long>(size/(double)rows + 0.5); rows = std::max(1ul,rows); cols = std::max(1ul,cols); } } }; // ---------------------------------------------------------------------------------------- inline point_transform_affine get_mapping_to_chip ( const chip_details& details ) { std::vector<dlib::vector<double,2> > from, to; point p1(0,0); point p2(details.cols-1,0); point p3(details.cols-1, details.rows-1); to.push_back(p1); from.push_back(rotate_point<double>(center(details.rect),details.rect.tl_corner(),details.angle)); to.push_back(p2); from.push_back(rotate_point<double>(center(details.rect),details.rect.tr_corner(),details.angle)); to.push_back(p3); from.push_back(rotate_point<double>(center(details.rect),details.rect.br_corner(),details.angle)); return find_affine_transform(from, to); } // ---------------------------------------------------------------------------------------- inline full_object_detection map_det_to_chip( const full_object_detection& det, const chip_details& details ) { point_transform_affine tform = get_mapping_to_chip(details); full_object_detection res(det); // map the parts for (unsigned long l = 0; l < det.num_parts(); ++l) { if (det.part(l) != OBJECT_PART_NOT_PRESENT) res.part(l) = tform(det.part(l)); else res.part(l) = OBJECT_PART_NOT_PRESENT; } // map the main rectangle rectangle rect; rect += tform(det.get_rect().tl_corner()); rect += tform(det.get_rect().tr_corner()); rect += tform(det.get_rect().bl_corner()); rect += tform(det.get_rect().br_corner()); res.get_rect() = rect; return res; } // ---------------------------------------------------------------------------------------- namespace impl { template < typename image_type1, typename image_type2 > void basic_extract_image_chip ( const image_type1& img, const rectangle& location, image_type2& chip ) /*! ensures - This function doesn't do any scaling or rotating. It just pulls out the chip in the given rectangle. This also means the output image has the same dimensions as the location rectangle. !*/ { const_image_view<image_type1> vimg(img); image_view<image_type2> vchip(chip); vchip.set_size(location.height(), location.width()); // location might go outside img so clip it rectangle area = location.intersect(get_rect(img)); // find the part of the chip that corresponds to area in img. rectangle chip_area = translate_rect(area, -location.tl_corner()); zero_border_pixels(chip, chip_area); // now pull out the contents of area/chip_area. for (long r = chip_area.top(), rr = area.top(); r <= chip_area.bottom(); ++r,++rr) { for (long c = chip_area.left(), cc = area.left(); c <= chip_area.right(); ++c,++cc) { assign_pixel(vchip[r][c], vimg[rr][cc]); } } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > void extract_image_chips ( const image_type1& img, const std::vector<chip_details>& chip_locations, dlib::array<image_type2>& chips, const interpolation_type& interp ) { // make sure requires clause is not broken #ifdef ENABLE_ASSERTS for (unsigned long i = 0; i < chip_locations.size(); ++i) { DLIB_CASSERT(chip_locations[i].size() != 0 && chip_locations[i].rect.is_empty() == false, "\t void extract_image_chips()" << "\n\t Invalid inputs were given to this function." << "\n\t chip_locations["<<i<<"].size(): " << chip_locations[i].size() << "\n\t chip_locations["<<i<<"].rect.is_empty(): " << chip_locations[i].rect.is_empty() ); } #endif // If nearest-neighbor interpolation is wanted, then don't use an image pyramid. constexpr bool image_pyramid_enabled = !std::is_same< typename std::remove_const<typename std::remove_reference<decltype(interp)>::type>::type, interpolate_nearest_neighbor >::value; pyramid_down<2> pyr; long max_depth = 0; // If the chip is supposed to be much smaller than the source subwindow then you // can't just extract it using bilinear interpolation since at a high enough // downsampling amount it would effectively turn into nearest neighbor // interpolation. So we use an image pyramid to make sure the interpolation is // fast but also high quality. The first thing we do is figure out how deep the // image pyramid needs to be. rectangle bounding_box; for (unsigned long i = 0; i < chip_locations.size(); ++i) { long depth = 0; double grow = 2; drectangle rect = pyr.rect_down(chip_locations[i].rect); while (rect.area() > chip_locations[i].size() && image_pyramid_enabled) { rect = pyr.rect_down(rect); ++depth; // We drop the image size by a factor of 2 each iteration and then assume a // border of 2 pixels is needed to avoid any border effects of the crop. grow = grow*2 + 2; } drectangle rot_rect; const vector<double,2> cent = center(chip_locations[i].rect); rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle); rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle); rot_rect += rotate_point<double>(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle); rot_rect += rotate_point<double>(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle); bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img)); max_depth = std::max(depth,max_depth); } //std::cout << "max_depth: " << max_depth << std::endl; //std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl; // now make an image pyramid dlib::array<array2d<typename image_traits<image_type1>::pixel_type> > levels(max_depth); if (levels.size() != 0) pyr(sub_image(img,bounding_box),levels[0]); for (unsigned long i = 1; i < levels.size(); ++i) pyr(levels[i-1],levels[i]); std::vector<dlib::vector<double,2> > from, to; // now pull out the chips chips.resize(chip_locations.size()); for (unsigned long i = 0; i < chips.size(); ++i) { // If the chip doesn't have any rotation or scaling then use the basic version // of chip extraction that just does a fast copy. if (chip_locations[i].angle == 0 && chip_locations[i].rows == chip_locations[i].rect.height() && chip_locations[i].cols == chip_locations[i].rect.width()) { impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]); } else { set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols); // figure out which level in the pyramid to use to extract the chip int level = -1; drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner()); while (pyr.rect_down(rect).area() > chip_locations[i].size() && image_pyramid_enabled) { ++level; rect = pyr.rect_down(rect); } // find the appropriate transformation that maps from the chip to the input // image from.clear(); to.clear(); from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point<double>(center(rect),rect.tl_corner(),chip_locations[i].angle)); from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point<double>(center(rect),rect.tr_corner(),chip_locations[i].angle)); from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point<double>(center(rect),rect.bl_corner(),chip_locations[i].angle)); point_transform_affine trns = find_affine_transform(from,to); // now extract the actual chip if (level == -1) transform_image(sub_image(img,bounding_box),chips[i],interp,trns); else transform_image(levels[level],chips[i],interp,trns); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void extract_image_chips( const image_type1& img, const std::vector<chip_details>& chip_locations, dlib::array<image_type2>& chips ) { extract_image_chips(img, chip_locations, chips, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > void extract_image_chip ( const image_type1& img, const chip_details& location, image_type2& chip, const interpolation_type& interp ) { // If the chip doesn't have any rotation or scaling then use the basic version of // chip extraction that just does a fast copy. if (location.angle == 0 && location.rows == location.rect.height() && location.cols == location.rect.width()) { impl::basic_extract_image_chip(img, location.rect, chip); } else { std::vector<chip_details> chip_locations(1,location); dlib::array<image_type2> chips; extract_image_chips(img, chip_locations, chips, interp); swap(chips[0], chip); } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void extract_image_chip ( const image_type1& img, const chip_details& location, image_type2& chip ) { extract_image_chip(img, location, chip, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- inline chip_details get_face_chip_details ( const full_object_detection& det, const unsigned long size = 200, const double padding = 0.2 ) { DLIB_CASSERT(det.num_parts() == 68 || det.num_parts() == 5, "\t chip_details get_face_chip_details()" << "\n\t You have to give either a 5 point or 68 point face landmarking output to this function. " << "\n\t det.num_parts(): " << det.num_parts() ); DLIB_CASSERT(padding >= 0 && size > 0, "\t chip_details get_face_chip_details()" << "\n\t Invalid inputs were given to this function." << "\n\t padding: " << padding << "\n\t size: " << size ); std::vector<dpoint> from_points, to_points; if (det.num_parts() == 5) { dpoint p0(0.8595674595992, 0.2134981538014); dpoint p1(0.6460604764104, 0.2289674387677); dpoint p2(0.1205750620789, 0.2137274526848); dpoint p3(0.3340850613712, 0.2290642403242); dpoint p4(0.4901123135679, 0.6277975316475); p0 = (padding+p0)/(2*padding+1); p1 = (padding+p1)/(2*padding+1); p2 = (padding+p2)/(2*padding+1); p3 = (padding+p3)/(2*padding+1); p4 = (padding+p4)/(2*padding+1); from_points.push_back(p0*size); to_points.push_back(det.part(0)); from_points.push_back(p1*size); to_points.push_back(det.part(1)); from_points.push_back(p2*size); to_points.push_back(det.part(2)); from_points.push_back(p3*size); to_points.push_back(det.part(3)); from_points.push_back(p4*size); to_points.push_back(det.part(4)); } else { // Average positions of face points 17-67 const double mean_face_shape_x[] = { 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364, 0.490127, 0.42689 }; const double mean_face_shape_y[] = { 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, 0.784792, 0.824182, 0.831803, 0.824182 }; COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17); for (unsigned long i = 17; i < det.num_parts(); ++i) { // Ignore the lower lip if ((55 <= i && i <= 59) || (65 <= i && i <= 67)) continue; // Ignore the eyebrows if (17 <= i && i <= 26) continue; dpoint p; p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1); p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1); from_points.push_back(p*size); to_points.push_back(det.part(i)); } } return chip_details(from_points, to_points, chip_dims(size,size)); } // ---------------------------------------------------------------------------------------- inline std::vector<chip_details> get_face_chip_details ( const std::vector<full_object_detection>& dets, const unsigned long size = 200, const double padding = 0.2 ) { std::vector<chip_details> res; res.reserve(dets.size()); for (unsigned long i = 0; i < dets.size(); ++i) res.push_back(get_face_chip_details(dets[i], size, padding)); return res; } // ---------------------------------------------------------------------------------------- template < typename image_type > void extract_image_4points ( const image_type& img_, image_type& out_, const std::array<dpoint,4>& pts ) { const_image_view<image_type> img(img_); image_view<image_type> out(out_); if (out.size() == 0) return; drectangle bounding_box; for (auto& p : pts) bounding_box += p; const std::array<dpoint,4> corners = {{bounding_box.tl_corner(), bounding_box.tr_corner(), bounding_box.bl_corner(), bounding_box.br_corner()}}; matrix<double> dists(4,4); for (long r = 0; r < dists.nr(); ++r) { for (long c = 0; c < dists.nc(); ++c) { dists(r,c) = length_squared(corners[r] - pts[c]); } } matrix<long long> idists = matrix_cast<long long>(-round(std::numeric_limits<long long>::max()*(dists/max(dists)))); const drectangle area = get_rect(out); std::vector<dpoint> from_points = {area.tl_corner(), area.tr_corner(), area.bl_corner(), area.br_corner()}; // find the assignment of corners to pts auto assignment = max_cost_assignment(idists); std::vector<dpoint> to_points(4); for (size_t i = 0; i < assignment.size(); ++i) to_points[i] = pts[assignment[i]]; auto tform = find_projective_transform(from_points, to_points); transform_image(img_, out_, interpolate_bilinear(), tform); } template < typename image_type > void extract_image_4points ( const image_type& img, image_type& out, const std::array<line,4>& lines ) { extract_image_4points(img, out, find_convex_quadrilateral(lines)); } // ---------------------------------------------------------------------------------------- template < typename image_type > image_type jitter_image( const image_type& img, dlib::rand& rnd ) { DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); DLIB_CASSERT(num_rows(img)==num_columns(img)); const double max_rotation_degrees = 3; const double min_object_height = 0.97; const double max_object_height = 0.99999; const double translate_amount = 0.02; const auto rect = shrink_rect(get_rect(img),3); // perturb the location of the crop by a small fraction of the object's size. const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); // perturb the scale of the crop by a fraction of the object's size const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); const long box_size = rect.height()/rand_scale_perturb; const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; image_type crop; extract_image_chip(img, chip_details(crop_rect, chip_dims(num_rows(img),num_columns(img)), angle), crop); if (rnd.get_random_double() > 0.5) flip_image_left_right(crop); return crop; } // ---------------------------------------------------------------------------------------- } #endif // DLIB_INTERPOlATIONh_