File size: 7,188 Bytes
9375c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
This example shows how to run a CNN based dog face detector using dlib. The
example loads a pretrained model and uses it to find dog faces in images.
We also use the dlib::shape_predictor to find the location of the eyes and
nose and then draw glasses and a mustache onto each dog found :)
Users who are just learning about dlib's deep learning API should read the
dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn how
the API works. For an introduction to the object detection method you
should read dnn_mmod_ex.cpp
Finally, users interested in how the dog face detector was trained should
read the dnn_mmod_ex.cpp example program. It should be noted that the
dog face detector used in this example uses a bigger training dataset and
larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but
otherwise training is the same. If you compare the net_type statements
in this file and dnn_mmod_ex.cpp you will see that they are very similar
except that the number of parameters has been increased.
Additionally, the following training parameters were different during
training: The following lines in dnn_mmod_ex.cpp were changed from
mmod_options options(face_boxes_train, 40,40);
to the following when training the model used in this example:
mmod_options options(face_boxes_train, 80,80);
Also, the random_cropper was left at its default settings, So we didn't
call these functions:
cropper.set_chip_dims(200, 200);
The training data used to create the model is also available at
Lastly, the shape_predictor was trained with default settings except we
used the following non-default settings: cascade depth=20, tree
depth=5, padding=0.2
#include <iostream>
#include <dlib/dnn.h>
#include <dlib/data_io.h>
#include <dlib/image_processing.h>
#include <dlib/gui_widgets.h>
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------------------
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>;
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
// ----------------------------------------------------------------------------------------
int main(int argc, char** argv) try
if (argc < 3)
cout << "Call this program like this:" << endl;
cout << "./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg" << endl;
cout << "\nYou can get the mmod_dog_hipsterizer.dat file from:\n";
cout << "" << endl;
return 0;
// load the models as well as glasses and mustache.
net_type net;
shape_predictor sp;
matrix<rgb_alpha_pixel> glasses, mustache;
deserialize(argv[1]) >> net >> sp >> glasses >> mustache;
image_window win1(glasses);
image_window win2(mustache);
image_window win_wireframe, win_hipster;
// Now process each image, find dogs, and hipsterize them by drawing glasses and a
// mustache on each dog :)
for (int i = 2; i < argc; ++i)
matrix<rgb_pixel> img;
load_image(img, argv[i]);
// Upsampling the image will allow us to find smaller dog faces but will use more
// computational resources.
auto dets = net(img);
// We will also draw a wireframe on each dog's face so you can see where the
// shape_predictor is identifying face landmarks.
std::vector<image_window::overlay_line> lines;
for (auto&& d : dets)
// get the landmarks for this dog's face
auto shape = sp(img, d.rect);
const rgb_pixel color(0,255,0);
auto top = shape.part(0);
auto lear = shape.part(1);
auto leye = shape.part(2);
auto nose = shape.part(3);
auto rear = shape.part(4);
auto reye = shape.part(5);
// The locations of the left and right ends of the mustache.
auto lmustache = 1.3*(leye-reye)/2 + nose;
auto rmustache = 1.3*(reye-leye)/2 + nose;
// Draw the glasses onto the image.
std::vector<point> from = {2*point(176,36), 2*point(59,35)}, to = {leye, reye};
auto tform = find_similarity_transform(from, to);
for (long r = 0; r <; ++r)
for (long c = 0; c <; ++c)
point p = tform(point(c,r));
if (get_rect(img).contains(p))
assign_pixel(img(p.y(),p.x()), glasses(r,c));
// Draw the mustache onto the image right under the dog's nose.
auto mrect = get_rect(mustache);
from = {mrect.tl_corner(), mrect.tr_corner()};
to = {rmustache, lmustache};
tform = find_similarity_transform(from, to);
for (long r = 0; r <; ++r)
for (long c = 0; c <; ++c)
point p = tform(point(c,r));
if (get_rect(img).contains(p))
assign_pixel(img(p.y(),p.x()), mustache(r,c));
// Record the lines needed for the face wire frame.
lines.push_back(image_window::overlay_line(leye, nose, color));
lines.push_back(image_window::overlay_line(nose, reye, color));
lines.push_back(image_window::overlay_line(reye, leye, color));
lines.push_back(image_window::overlay_line(reye, rear, color));
lines.push_back(image_window::overlay_line(rear, top, color));
lines.push_back(image_window::overlay_line(top, lear, color));
lines.push_back(image_window::overlay_line(lear, leye, color));
cout << "Hit enter to process the next image." << endl;
catch(std::exception& e)
cout << e.what() << endl;