// Copyright (C) 2012 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_LOAD_IMAGE_DaTASET_Hh_ #define DLIB_LOAD_IMAGE_DaTASET_Hh_ #include "load_image_dataset_abstract.h" #include "../misc_api.h" #include "../dir_nav.h" #include "../image_io.h" #include "../array.h" #include #include "../geometry.h" #include "image_dataset_metadata.h" #include #include #include "../image_processing/full_object_detection.h" #include #include #include "../image_transforms/image_pyramid.h" namespace dlib { // ---------------------------------------------------------------------------------------- class image_dataset_file { public: image_dataset_file(const std::string& filename) { _skip_empty_images = false; _have_parts = false; _filename = filename; _box_area_thresh = std::numeric_limits::infinity(); } image_dataset_file boxes_match_label( const std::string& label ) const { image_dataset_file temp(*this); temp._labels.insert(label); return temp; } image_dataset_file skip_empty_images( ) const { image_dataset_file temp(*this); temp._skip_empty_images = true; return temp; } image_dataset_file boxes_have_parts( ) const { image_dataset_file temp(*this); temp._have_parts = true; return temp; } image_dataset_file shrink_big_images( double new_box_area_thresh = 150*150 ) const { image_dataset_file temp(*this); temp._box_area_thresh = new_box_area_thresh; return temp; } bool should_load_box ( const image_dataset_metadata::box& box ) const { if (_have_parts && box.parts.size() == 0) return false; if (_labels.size() == 0) return true; if (_labels.count(box.label) != 0) return true; return false; } const std::string& get_filename() const { return _filename; } bool should_skip_empty_images() const { return _skip_empty_images; } bool should_boxes_have_parts() const { return _have_parts; } double box_area_thresh() const { return _box_area_thresh; } const std::set& get_selected_box_labels() const { return _labels; } private: std::string _filename; std::set _labels; bool _skip_empty_images; bool _have_parts; double _box_area_thresh; }; // ---------------------------------------------------------------------------------------- template < typename array_type > std::vector > load_image_dataset ( array_type& images, std::vector >& object_locations, const image_dataset_file& source ) { images.clear(); object_locations.clear(); std::vector > ignored_rects; using namespace dlib::image_dataset_metadata; dataset data; load_image_dataset_metadata(data, source.get_filename()); // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); typedef typename array_type::value_type image_type; image_type img; std::vector rects, ignored; for (unsigned long i = 0; i < data.images.size(); ++i) { double min_rect_size = std::numeric_limits::infinity(); rects.clear(); ignored.clear(); for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { if (source.should_load_box(data.images[i].boxes[j])) { if (data.images[i].boxes[j].ignore) { ignored.push_back(data.images[i].boxes[j].rect); } else { rects.push_back(data.images[i].boxes[j].rect); min_rect_size = std::min(min_rect_size, rects.back().area()); } } } if (!source.should_skip_empty_images() || rects.size() != 0) { load_image(img, data.images[i].filename); if (rects.size() != 0) { // if shrinking the image would still result in the smallest box being // bigger than the box area threshold then shrink the image. while(min_rect_size/2/2 > source.box_area_thresh()) { pyramid_down<2> pyr; pyr(img); min_rect_size *= (1.0/2.0)*(1.0/2.0); for (auto&& r : rects) r = pyr.rect_down(r); for (auto&& r : ignored) r = pyr.rect_down(r); } while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) { pyramid_down<3> pyr; pyr(img); min_rect_size *= (2.0/3.0)*(2.0/3.0); for (auto&& r : rects) r = pyr.rect_down(r); for (auto&& r : ignored) r = pyr.rect_down(r); } } images.push_back(img); object_locations.push_back(rects); ignored_rects.push_back(ignored); } } return ignored_rects; } // ---------------------------------------------------------------------------------------- namespace impl { inline size_t num_non_ignored_boxes (const std::vector& rects) { size_t cnt = 0; for (auto& b : rects) { if (!b.ignore) cnt++; } return cnt; } } template < typename array_type > void load_image_dataset ( array_type& images, std::vector >& object_locations, const image_dataset_file& source ) { images.clear(); object_locations.clear(); using namespace dlib::image_dataset_metadata; dataset data; load_image_dataset_metadata(data, source.get_filename()); // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); typedef typename array_type::value_type image_type; image_type img; std::vector rects; for (unsigned long i = 0; i < data.images.size(); ++i) { double min_rect_size = std::numeric_limits::infinity(); rects.clear(); for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { if (source.should_load_box(data.images[i].boxes[j])) { if (data.images[i].boxes[j].ignore) { rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect)); } else { rects.push_back(mmod_rect(data.images[i].boxes[j].rect)); min_rect_size = std::min(min_rect_size, rects.back().rect.area()); } rects.back().label = data.images[i].boxes[j].label; } } if (!source.should_skip_empty_images() || impl::num_non_ignored_boxes(rects) != 0) { load_image(img, data.images[i].filename); if (rects.size() != 0) { // if shrinking the image would still result in the smallest box being // bigger than the box area threshold then shrink the image. while(min_rect_size/2/2 > source.box_area_thresh()) { pyramid_down<2> pyr; pyr(img); min_rect_size *= (1.0/2.0)*(1.0/2.0); for (auto&& r : rects) r.rect = pyr.rect_down(r.rect); } while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) { pyramid_down<3> pyr; pyr(img); min_rect_size *= (2.0/3.0)*(2.0/3.0); for (auto&& r : rects) r.rect = pyr.rect_down(r.rect); } } images.push_back(std::move(img)); object_locations.push_back(std::move(rects)); } } } // ---------------------------------------------------------------------------------------- // ******* THIS FUNCTION IS DEPRECATED, you should use another version of load_image_dataset() ******* template < typename image_type, typename MM > std::vector > load_image_dataset ( array& images, std::vector >& object_locations, const std::string& filename, const std::string& label, bool skip_empty_images = false ) { image_dataset_file f(filename); if (label.size() != 0) f = f.boxes_match_label(label); if (skip_empty_images) f = f.skip_empty_images(); return load_image_dataset(images, object_locations, f); } // ---------------------------------------------------------------------------------------- template < typename array_type > std::vector > load_image_dataset ( array_type& images, std::vector >& object_locations, const std::string& filename ) { return load_image_dataset(images, object_locations, image_dataset_file(filename)); } // ---------------------------------------------------------------------------------------- template < typename array_type > void load_image_dataset ( array_type& images, std::vector>& object_locations, const std::string& filename ) { load_image_dataset(images, object_locations, image_dataset_file(filename)); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename array_type > std::vector > load_image_dataset ( array_type& images, std::vector >& object_locations, const image_dataset_file& source, std::vector& parts_list ) { typedef typename array_type::value_type image_type; parts_list.clear(); images.clear(); object_locations.clear(); using namespace dlib::image_dataset_metadata; dataset data; load_image_dataset_metadata(data, source.get_filename()); // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); std::set all_parts; // find out what parts are being used in the dataset. Store results in all_parts. for (unsigned long i = 0; i < data.images.size(); ++i) { for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { if (source.should_load_box(data.images[i].boxes[j])) { const std::map& parts = data.images[i].boxes[j].parts; std::map::const_iterator itr; for (itr = parts.begin(); itr != parts.end(); ++itr) { all_parts.insert(itr->first); } } } } // make a mapping between part names and the integers [0, all_parts.size()) std::map parts_idx; for (std::set::iterator i = all_parts.begin(); i != all_parts.end(); ++i) { parts_idx[*i] = parts_list.size(); parts_list.push_back(*i); } std::vector > ignored_rects; std::vector ignored; image_type img; std::vector object_dets; for (unsigned long i = 0; i < data.images.size(); ++i) { double min_rect_size = std::numeric_limits::infinity(); object_dets.clear(); ignored.clear(); for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { if (source.should_load_box(data.images[i].boxes[j])) { if (data.images[i].boxes[j].ignore) { ignored.push_back(data.images[i].boxes[j].rect); } else { std::vector partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT); // populate partlist with all the parts present in this box. const std::map& parts = data.images[i].boxes[j].parts; std::map::const_iterator itr; for (itr = parts.begin(); itr != parts.end(); ++itr) { partlist[parts_idx[itr->first]] = itr->second; } object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist)); min_rect_size = std::min(min_rect_size, object_dets.back().get_rect().area()); } } } if (!source.should_skip_empty_images() || object_dets.size() != 0) { load_image(img, data.images[i].filename); if (object_dets.size() != 0) { // if shrinking the image would still result in the smallest box being // bigger than the box area threshold then shrink the image. while(min_rect_size/2/2 > source.box_area_thresh()) { pyramid_down<2> pyr; pyr(img); min_rect_size *= (1.0/2.0)*(1.0/2.0); for (auto&& r : object_dets) { r.get_rect() = pyr.rect_down(r.get_rect()); for (unsigned long k = 0; k < r.num_parts(); ++k) r.part(k) = pyr.point_down(r.part(k)); } for (auto&& r : ignored) { r = pyr.rect_down(r); } } while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) { pyramid_down<3> pyr; pyr(img); min_rect_size *= (2.0/3.0)*(2.0/3.0); for (auto&& r : object_dets) { r.get_rect() = pyr.rect_down(r.get_rect()); for (unsigned long k = 0; k < r.num_parts(); ++k) r.part(k) = pyr.point_down(r.part(k)); } for (auto&& r : ignored) { r = pyr.rect_down(r); } } } images.push_back(img); object_locations.push_back(object_dets); ignored_rects.push_back(ignored); } } return ignored_rects; } // ---------------------------------------------------------------------------------------- template < typename array_type > std::vector > load_image_dataset ( array_type& images, std::vector >& object_locations, const image_dataset_file& source ) { std::vector parts_list; return load_image_dataset(images, object_locations, source, parts_list); } // ---------------------------------------------------------------------------------------- template < typename array_type > std::vector > load_image_dataset ( array_type& images, std::vector >& object_locations, const std::string& filename ) { std::vector parts_list; return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list); } // ---------------------------------------------------------------------------------------- } #endif // DLIB_LOAD_IMAGE_DaTASET_Hh_