// Copyright (C) 2016 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_RaNDOM_CROPPER_H_ #define DLIB_RaNDOM_CROPPER_H_ #include "random_cropper_abstract.h" #include "../threads.h" #include <mutex> #include <vector> #include "interpolation.h" #include "../image_processing/full_object_detection.h" #include "../rand.h" namespace dlib { class random_cropper { chip_dims dims = chip_dims(300,300); bool randomly_flip = true; double max_rotation_degrees = 30; long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge. long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge. double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. double background_crops_fraction = 0.5; double translate_amount = 0.10; std::mutex rnd_mutex; dlib::rand rnd; public: void set_seed ( time_t seed ) { rnd = dlib::rand(seed); } double get_translate_amount ( ) const { return translate_amount; } void set_translate_amount ( double value ) { DLIB_CASSERT(0 <= value); translate_amount = value; } double get_background_crops_fraction ( ) const { return background_crops_fraction; } void set_background_crops_fraction ( double value ) { DLIB_CASSERT(0 <= value && value <= 1); background_crops_fraction = value; } const chip_dims& get_chip_dims( ) const { return dims; } void set_chip_dims ( const chip_dims& dims_ ) { dims = dims_; } void set_chip_dims ( unsigned long rows, unsigned long cols ) { set_chip_dims(chip_dims(rows,cols)); } bool get_randomly_flip ( ) const { return randomly_flip; } void set_randomly_flip ( bool value ) { randomly_flip = value; } double get_max_rotation_degrees ( ) const { return max_rotation_degrees; } void set_max_rotation_degrees ( double value ) { max_rotation_degrees = std::abs(value); } long get_min_object_length_long_dim ( ) const { return min_object_length_long_dim; } long get_min_object_length_short_dim ( ) const { return min_object_length_short_dim; } void set_min_object_size ( long long_dim, long short_dim ) { DLIB_CASSERT(0 < short_dim && short_dim <= long_dim); min_object_length_long_dim = long_dim; min_object_length_short_dim = short_dim; } double get_max_object_size ( ) const { return max_object_size; } void set_max_object_size ( double value ) { DLIB_CASSERT(0 < value); max_object_size = value; } template < typename array_type > void operator() ( size_t num_crops, const array_type& images, const std::vector<std::vector<mmod_rect>>& rects, array_type& crops, std::vector<std::vector<mmod_rect>>& crop_rects ) { DLIB_CASSERT(images.size() == rects.size()); crops.clear(); crop_rects.clear(); append(num_crops, images, rects, crops, crop_rects); } template < typename array_type > void append ( size_t num_crops, const array_type& images, const std::vector<std::vector<mmod_rect>>& rects, array_type& crops, std::vector<std::vector<mmod_rect>>& crop_rects ) { DLIB_CASSERT(images.size() == rects.size()); DLIB_CASSERT(crops.size() == crop_rects.size()); auto original_size = crops.size(); crops.resize(crops.size()+num_crops); crop_rects.resize(crop_rects.size()+num_crops); parallel_for(original_size, original_size+num_crops, [&](long i) { (*this)(images, rects, crops[i], crop_rects[i]); }); } template < typename array_type, typename image_type > void operator() ( const array_type& images, const std::vector<std::vector<mmod_rect>>& rects, image_type& crop, std::vector<mmod_rect>& crop_rects ) { DLIB_CASSERT(images.size() == rects.size()); size_t idx; { std::lock_guard<std::mutex> lock(rnd_mutex); idx = rnd.get_integer(images.size()); } (*this)(images[idx], rects[idx], crop, crop_rects); } template < typename image_type1 > image_type1 operator() ( const image_type1& img ) { image_type1 crop; std::vector<mmod_rect> junk1, junk2; (*this)(img, junk1, crop, junk2); return crop; } template < typename image_type1, typename image_type2 > void operator() ( const image_type1& img, const std::vector<mmod_rect>& rects, image_type2& crop, std::vector<mmod_rect>& crop_rects ) { DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); chip_details crop_plan; bool should_flip_crop; make_crop_plan(img, rects, crop_plan, should_flip_crop); extract_image_chip(img, crop_plan, crop); const rectangle_transform tform = get_mapping_to_chip(crop_plan); // copy rects into crop_rects and set ones that are outside the crop to ignore or // drop entirely as appropriate. crop_rects.clear(); for (auto rect : rects) { // map to crop rect.rect = tform(rect.rect); // if the rect is at least partly in the crop if (get_rect(crop).intersect(rect.rect).area() != 0) { // set to ignore if not totally in the crop or if too small. if (!get_rect(crop).contains(rect.rect) || ((long)rect.rect.height() < min_object_length_long_dim && (long)rect.rect.width() < min_object_length_long_dim) || ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim)) { rect.ignore = true; } crop_rects.push_back(rect); } } // Also randomly flip the image if (should_flip_crop) { image_type2 temp; flip_image_left_right(crop, temp); swap(crop,temp); for (auto&& rect : crop_rects) rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop)); } } private: template <typename image_type1> void make_crop_plan ( const image_type1& img, const std::vector<mmod_rect>& rects, chip_details& crop_plan, bool& should_flip_crop ) { std::lock_guard<std::mutex> lock(rnd_mutex); rectangle crop_rect; if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) { auto rect = rects[randomly_pick_rect(rects)].rect; // perturb the location of the crop by a small fraction of the object's size. const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()), rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); // We are going to grow rect into the cropping rect. First, we grow it a // little so that it has the desired minimum border around it. drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size); // Now make rect have the same aspect ratio as dims so that there won't be // any funny stretching when we crop it. We do this by growing it along // whichever dimension is too short. const double target_aspect = dims.cols/(double)dims.rows; if (drect.width()/drect.height() < target_aspect) drect = centered_drect(drect, target_aspect*drect.height(), drect.height()); else drect = centered_drect(drect, drect.width(), drect.width()/target_aspect); // Now perturb the scale of the crop. We do this by shrinking it, but not // so much that it gets smaller than the min object sizes require. double current_width = dims.cols*rect.width()/drect.width(); double current_height = dims.rows*rect.height()/drect.height(); // never make any dimension smaller than the short dim. double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height); // at least one dimension needs to be longer than the long dim. double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height); double min_scale = std::max(min_scale1, min_scale2); const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1); crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb); } else { crop_rect = make_random_cropping_rect(img); } should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5; const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; crop_plan = chip_details(crop_rect, dims, angle); } bool has_non_ignored_box ( const std::vector<mmod_rect>& rects ) const { for (auto&& b : rects) { if (!b.ignore) return true; } return false; } size_t randomly_pick_rect ( const std::vector<mmod_rect>& rects ) { DLIB_CASSERT(has_non_ignored_box(rects)); size_t idx = rnd.get_integer(rects.size()); while(rects[idx].ignore) idx = rnd.get_integer(rects.size()); return idx; } template <typename image_type> rectangle make_random_cropping_rect( const image_type& img_ ) { const_image_view<image_type> img(img_); // Figure out what rectangle we want to crop from the image. We are going to // crop out an image of size this->dims, so we pick a random scale factor that // lets this random box be either as big as it can be while still fitting in // the image or as small as a 3x zoomed in box randomly somewhere in the image. double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols); mins = std::min(mins, maxs); auto scale = rnd.get_double_in_range(mins, maxs); rectangle rect(scale*dims.cols, scale*dims.rows); // randomly shift the box around point offset(rnd.get_integer(1+img.nc()-rect.width()), rnd.get_integer(1+img.nr()-rect.height())); return move_rect(rect, offset); } }; // ---------------------------------------------------------------------------------------- inline std::ostream& operator<< ( std::ostream& out, const random_cropper& item ) { using std::endl; out << "random_cropper details: " << endl; out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl; out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl; out << " max_object_size: " << item.get_max_object_size() << endl; out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; out << " translate_amount: " << item.get_translate_amount() << endl; return out; } // ---------------------------------------------------------------------------------------- } #endif // DLIB_RaNDOM_CROPPER_H_