|
|
|
|
|
#ifndef DLIB_LOAD_IMAGE_DaTASET_Hh_ |
|
#define DLIB_LOAD_IMAGE_DaTASET_Hh_ |
|
|
|
#include "load_image_dataset_abstract.h" |
|
#include "../misc_api.h" |
|
#include "../dir_nav.h" |
|
#include "../image_io.h" |
|
#include "../array.h" |
|
#include <vector> |
|
#include "../geometry.h" |
|
#include "image_dataset_metadata.h" |
|
#include <string> |
|
#include <set> |
|
#include "../image_processing/full_object_detection.h" |
|
#include <utility> |
|
#include <limits> |
|
#include "../image_transforms/image_pyramid.h" |
|
|
|
|
|
namespace dlib |
|
{ |
|
|
|
|
|
|
|
class image_dataset_file |
|
{ |
|
public: |
|
image_dataset_file(const std::string& filename) |
|
{ |
|
_skip_empty_images = false; |
|
_have_parts = false; |
|
_filename = filename; |
|
_box_area_thresh = std::numeric_limits<double>::infinity(); |
|
} |
|
|
|
image_dataset_file boxes_match_label( |
|
const std::string& label |
|
) const |
|
{ |
|
image_dataset_file temp(*this); |
|
temp._labels.insert(label); |
|
return temp; |
|
} |
|
|
|
image_dataset_file skip_empty_images( |
|
) const |
|
{ |
|
image_dataset_file temp(*this); |
|
temp._skip_empty_images = true; |
|
return temp; |
|
} |
|
|
|
image_dataset_file boxes_have_parts( |
|
) const |
|
{ |
|
image_dataset_file temp(*this); |
|
temp._have_parts = true; |
|
return temp; |
|
} |
|
|
|
image_dataset_file shrink_big_images( |
|
double new_box_area_thresh = 150*150 |
|
) const |
|
{ |
|
image_dataset_file temp(*this); |
|
temp._box_area_thresh = new_box_area_thresh; |
|
return temp; |
|
} |
|
|
|
bool should_load_box ( |
|
const image_dataset_metadata::box& box |
|
) const |
|
{ |
|
if (_have_parts && box.parts.size() == 0) |
|
return false; |
|
if (_labels.size() == 0) |
|
return true; |
|
if (_labels.count(box.label) != 0) |
|
return true; |
|
return false; |
|
} |
|
|
|
const std::string& get_filename() const { return _filename; } |
|
bool should_skip_empty_images() const { return _skip_empty_images; } |
|
bool should_boxes_have_parts() const { return _have_parts; } |
|
double box_area_thresh() const { return _box_area_thresh; } |
|
const std::set<std::string>& get_selected_box_labels() const { return _labels; } |
|
|
|
private: |
|
std::string _filename; |
|
std::set<std::string> _labels; |
|
bool _skip_empty_images; |
|
bool _have_parts; |
|
double _box_area_thresh; |
|
|
|
}; |
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<rectangle> >& object_locations, |
|
const image_dataset_file& source |
|
) |
|
{ |
|
images.clear(); |
|
object_locations.clear(); |
|
|
|
std::vector<std::vector<rectangle> > ignored_rects; |
|
|
|
using namespace dlib::image_dataset_metadata; |
|
dataset data; |
|
load_image_dataset_metadata(data, source.get_filename()); |
|
|
|
|
|
|
|
|
|
locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); |
|
|
|
|
|
typedef typename array_type::value_type image_type; |
|
|
|
|
|
image_type img; |
|
std::vector<rectangle> rects, ignored; |
|
for (unsigned long i = 0; i < data.images.size(); ++i) |
|
{ |
|
double min_rect_size = std::numeric_limits<double>::infinity(); |
|
rects.clear(); |
|
ignored.clear(); |
|
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) |
|
{ |
|
if (source.should_load_box(data.images[i].boxes[j])) |
|
{ |
|
if (data.images[i].boxes[j].ignore) |
|
{ |
|
ignored.push_back(data.images[i].boxes[j].rect); |
|
} |
|
else |
|
{ |
|
rects.push_back(data.images[i].boxes[j].rect); |
|
min_rect_size = std::min<double>(min_rect_size, rects.back().area()); |
|
} |
|
} |
|
} |
|
|
|
if (!source.should_skip_empty_images() || rects.size() != 0) |
|
{ |
|
load_image(img, data.images[i].filename); |
|
if (rects.size() != 0) |
|
{ |
|
|
|
|
|
while(min_rect_size/2/2 > source.box_area_thresh()) |
|
{ |
|
pyramid_down<2> pyr; |
|
pyr(img); |
|
min_rect_size *= (1.0/2.0)*(1.0/2.0); |
|
for (auto&& r : rects) |
|
r = pyr.rect_down(r); |
|
for (auto&& r : ignored) |
|
r = pyr.rect_down(r); |
|
} |
|
while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) |
|
{ |
|
pyramid_down<3> pyr; |
|
pyr(img); |
|
min_rect_size *= (2.0/3.0)*(2.0/3.0); |
|
for (auto&& r : rects) |
|
r = pyr.rect_down(r); |
|
for (auto&& r : ignored) |
|
r = pyr.rect_down(r); |
|
} |
|
} |
|
images.push_back(img); |
|
object_locations.push_back(rects); |
|
ignored_rects.push_back(ignored); |
|
} |
|
} |
|
|
|
return ignored_rects; |
|
} |
|
|
|
|
|
|
|
namespace impl |
|
{ |
|
inline size_t num_non_ignored_boxes (const std::vector<mmod_rect>& rects) |
|
{ |
|
size_t cnt = 0; |
|
for (auto& b : rects) |
|
{ |
|
if (!b.ignore) |
|
cnt++; |
|
} |
|
return cnt; |
|
} |
|
} |
|
|
|
template < |
|
typename array_type |
|
> |
|
void load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<mmod_rect> >& object_locations, |
|
const image_dataset_file& source |
|
) |
|
{ |
|
images.clear(); |
|
object_locations.clear(); |
|
|
|
using namespace dlib::image_dataset_metadata; |
|
dataset data; |
|
load_image_dataset_metadata(data, source.get_filename()); |
|
|
|
|
|
|
|
|
|
locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); |
|
|
|
typedef typename array_type::value_type image_type; |
|
|
|
image_type img; |
|
std::vector<mmod_rect> rects; |
|
for (unsigned long i = 0; i < data.images.size(); ++i) |
|
{ |
|
double min_rect_size = std::numeric_limits<double>::infinity(); |
|
rects.clear(); |
|
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) |
|
{ |
|
if (source.should_load_box(data.images[i].boxes[j])) |
|
{ |
|
if (data.images[i].boxes[j].ignore) |
|
{ |
|
rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect)); |
|
} |
|
else |
|
{ |
|
rects.push_back(mmod_rect(data.images[i].boxes[j].rect)); |
|
min_rect_size = std::min<double>(min_rect_size, rects.back().rect.area()); |
|
} |
|
rects.back().label = data.images[i].boxes[j].label; |
|
|
|
} |
|
} |
|
|
|
if (!source.should_skip_empty_images() || impl::num_non_ignored_boxes(rects) != 0) |
|
{ |
|
load_image(img, data.images[i].filename); |
|
if (rects.size() != 0) |
|
{ |
|
|
|
|
|
while(min_rect_size/2/2 > source.box_area_thresh()) |
|
{ |
|
pyramid_down<2> pyr; |
|
pyr(img); |
|
min_rect_size *= (1.0/2.0)*(1.0/2.0); |
|
for (auto&& r : rects) |
|
r.rect = pyr.rect_down(r.rect); |
|
} |
|
while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) |
|
{ |
|
pyramid_down<3> pyr; |
|
pyr(img); |
|
min_rect_size *= (2.0/3.0)*(2.0/3.0); |
|
for (auto&& r : rects) |
|
r.rect = pyr.rect_down(r.rect); |
|
} |
|
} |
|
images.push_back(std::move(img)); |
|
object_locations.push_back(std::move(rects)); |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
template < |
|
typename image_type, |
|
typename MM |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array<image_type,MM>& images, |
|
std::vector<std::vector<rectangle> >& object_locations, |
|
const std::string& filename, |
|
const std::string& label, |
|
bool skip_empty_images = false |
|
) |
|
{ |
|
image_dataset_file f(filename); |
|
if (label.size() != 0) |
|
f = f.boxes_match_label(label); |
|
if (skip_empty_images) |
|
f = f.skip_empty_images(); |
|
return load_image_dataset(images, object_locations, f); |
|
} |
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<rectangle> >& object_locations, |
|
const std::string& filename |
|
) |
|
{ |
|
return load_image_dataset(images, object_locations, image_dataset_file(filename)); |
|
} |
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
void load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<mmod_rect>>& object_locations, |
|
const std::string& filename |
|
) |
|
{ |
|
load_image_dataset(images, object_locations, image_dataset_file(filename)); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<full_object_detection> >& object_locations, |
|
const image_dataset_file& source, |
|
std::vector<std::string>& parts_list |
|
) |
|
{ |
|
typedef typename array_type::value_type image_type; |
|
parts_list.clear(); |
|
images.clear(); |
|
object_locations.clear(); |
|
|
|
using namespace dlib::image_dataset_metadata; |
|
dataset data; |
|
load_image_dataset_metadata(data, source.get_filename()); |
|
|
|
|
|
|
|
|
|
locally_change_current_dir chdir(get_parent_directory(file(source.get_filename()))); |
|
|
|
|
|
std::set<std::string> all_parts; |
|
|
|
|
|
for (unsigned long i = 0; i < data.images.size(); ++i) |
|
{ |
|
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) |
|
{ |
|
if (source.should_load_box(data.images[i].boxes[j])) |
|
{ |
|
const std::map<std::string,point>& parts = data.images[i].boxes[j].parts; |
|
std::map<std::string,point>::const_iterator itr; |
|
|
|
for (itr = parts.begin(); itr != parts.end(); ++itr) |
|
{ |
|
all_parts.insert(itr->first); |
|
} |
|
} |
|
} |
|
} |
|
|
|
|
|
std::map<std::string,int> parts_idx; |
|
for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i) |
|
{ |
|
parts_idx[*i] = parts_list.size(); |
|
parts_list.push_back(*i); |
|
} |
|
|
|
std::vector<std::vector<rectangle> > ignored_rects; |
|
std::vector<rectangle> ignored; |
|
image_type img; |
|
std::vector<full_object_detection> object_dets; |
|
for (unsigned long i = 0; i < data.images.size(); ++i) |
|
{ |
|
double min_rect_size = std::numeric_limits<double>::infinity(); |
|
object_dets.clear(); |
|
ignored.clear(); |
|
for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) |
|
{ |
|
if (source.should_load_box(data.images[i].boxes[j])) |
|
{ |
|
if (data.images[i].boxes[j].ignore) |
|
{ |
|
ignored.push_back(data.images[i].boxes[j].rect); |
|
} |
|
else |
|
{ |
|
std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT); |
|
|
|
|
|
const std::map<std::string,point>& parts = data.images[i].boxes[j].parts; |
|
std::map<std::string,point>::const_iterator itr; |
|
for (itr = parts.begin(); itr != parts.end(); ++itr) |
|
{ |
|
partlist[parts_idx[itr->first]] = itr->second; |
|
} |
|
|
|
object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist)); |
|
min_rect_size = std::min<double>(min_rect_size, object_dets.back().get_rect().area()); |
|
} |
|
} |
|
} |
|
|
|
if (!source.should_skip_empty_images() || object_dets.size() != 0) |
|
{ |
|
load_image(img, data.images[i].filename); |
|
if (object_dets.size() != 0) |
|
{ |
|
|
|
|
|
while(min_rect_size/2/2 > source.box_area_thresh()) |
|
{ |
|
pyramid_down<2> pyr; |
|
pyr(img); |
|
min_rect_size *= (1.0/2.0)*(1.0/2.0); |
|
for (auto&& r : object_dets) |
|
{ |
|
r.get_rect() = pyr.rect_down(r.get_rect()); |
|
for (unsigned long k = 0; k < r.num_parts(); ++k) |
|
r.part(k) = pyr.point_down(r.part(k)); |
|
} |
|
for (auto&& r : ignored) |
|
{ |
|
r = pyr.rect_down(r); |
|
} |
|
} |
|
while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh()) |
|
{ |
|
pyramid_down<3> pyr; |
|
pyr(img); |
|
min_rect_size *= (2.0/3.0)*(2.0/3.0); |
|
for (auto&& r : object_dets) |
|
{ |
|
r.get_rect() = pyr.rect_down(r.get_rect()); |
|
for (unsigned long k = 0; k < r.num_parts(); ++k) |
|
r.part(k) = pyr.point_down(r.part(k)); |
|
} |
|
for (auto&& r : ignored) |
|
{ |
|
r = pyr.rect_down(r); |
|
} |
|
} |
|
} |
|
images.push_back(img); |
|
object_locations.push_back(object_dets); |
|
ignored_rects.push_back(ignored); |
|
} |
|
} |
|
|
|
|
|
return ignored_rects; |
|
} |
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<full_object_detection> >& object_locations, |
|
const image_dataset_file& source |
|
) |
|
{ |
|
std::vector<std::string> parts_list; |
|
return load_image_dataset(images, object_locations, source, parts_list); |
|
} |
|
|
|
|
|
|
|
template < |
|
typename array_type |
|
> |
|
std::vector<std::vector<rectangle> > load_image_dataset ( |
|
array_type& images, |
|
std::vector<std::vector<full_object_detection> >& object_locations, |
|
const std::string& filename |
|
) |
|
{ |
|
std::vector<std::string> parts_list; |
|
return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list); |
|
} |
|
|
|
|
|
|
|
} |
|
|
|
#endif |
|
|
|
|