File size: 21,162 Bytes
9375c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
<html><!-- Created using the cpp_pretty_printer from the dlib C++ library. See http://dlib.net for updates. --><head><title>dlib C++ Library - hog_abstract.h</title></head><body bgcolor='white'><pre>
<font color='#009900'>// Copyright (C) 2010 Davis E. King ([email protected])
</font><font color='#009900'>// License: Boost Software License See LICENSE.txt for the full license.
</font><font color='#0000FF'>#undef</font> DLIB_HoG_ABSTRACT_Hh_
<font color='#0000FF'>#ifdef</font> DLIB_HoG_ABSTRACT_Hh_
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../algs.h.html'>../algs.h</a>"
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../matrix.h.html'>../matrix.h</a>"
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../array2d.h.html'>../array2d.h</a>"
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../geometry.h.html'>../geometry.h</a>"
<font color='#0000FF'>#include</font> <font color='#5555FF'><</font>cmath<font color='#5555FF'>></font>
<font color='#0000FF'>namespace</font> dlib
<b>{</b>
<font color='#0000FF'>enum</font>
<b>{</b>
hog_no_interpolation,
hog_angle_interpolation,
hog_full_interpolation,
hog_signed_gradient,
hog_unsigned_gradient
<b>}</b>;
<font color='#0000FF'>template</font> <font color='#5555FF'><</font>
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> cell_size_,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> block_size_,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> cell_stride_,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_orientation_bins_,
<font color='#0000FF'><u>int</u></font> gradient_type_,
<font color='#0000FF'><u>int</u></font> interpolation_type_
<font color='#5555FF'>></font>
<font color='#0000FF'>class</font> <b><a name='hog_image'></a>hog_image</b> : noncopyable
<b>{</b>
<font color='#009900'>/*!
REQUIREMENTS ON TEMPLATE PARAMETERS
- cell_size_ > 1
- block_size_ > 0
- cell_stride_ > 0
- num_orientation_bins_ > 0
- gradient_type_ == hog_signed_gradient or hog_unsigned_gradient
- interpolation_type_ == hog_no_interpolation, hog_angle_interpolation, or
hog_full_interpolation
INITIAL VALUE
- size() == 0
WHAT THIS OBJECT REPRESENTS
This object is a tool for performing the image feature extraction algorithm
described in the following paper:
Histograms of Oriented Gradients for Human Detection
by Navneet Dalal and Bill Triggs
To summarize the technique, this object tiles non-overlapping cells over an
image. Each of these cells is a box that is cell_size by cell_size pixels
in size. Each cell contains an array of size num_orientation_bins. The array
in a cell is used to store a histogram of all the edge orientations contained
within the cell's image region.
Once the grid of cells and their histograms has been computed (via load())
you can obtain descriptors for each "block" in the image. A block is just a
group of cells and blocks are allowed to overlap. Each block is square and
made up of block_size*block_size cells. So when you call operator()(r,c)
what you obtain is a vector that is just a bunch of cell histograms that
have been concatenated (and length normalized).
The template arguments control the various parameters of this algorithm.
The interpolation_type parameter controls the amount of interpolation
that happens during the creation of the edge orientation histograms. It
varies from no interpolation at all to full spatial and angle interpolation.
Angle interpolation means that an edge doesn't just go into its nearest
histogram bin but instead gets interpolated into its two nearest neighbors.
Similarly, spatial interpolation means that an edge doesn't just go into
the cell it is in but it also contributes to nearby cells depending on how
close they are.
The gradient_type parameter controls how edge orientations are measured.
Consider the following ASCII art:
signed gradients: unsigned gradients:
/\ |
|| |
<--- ----> ------+------
|| |
\/ |
An image is full of gradients caused by edges between objects. The direction
of a gradient is determined by which end of it has pixels of highest intensity.
So for example, suppose you had a picture containing black and white stripes.
Then the magnitude of the gradient at each point in the image tells you if you
are on the edge of a stripe and the gradient's orientation tells you which
direction you have to move get into the white stripe.
Signed gradients preserve this direction information while unsigned gradients
do not. An unsigned gradient will only tell you the orientation of the stripe
but not which direction leads to the white stripe.
Finally, the cell_stride parameter controls how much overlap you get between
blocks. The maximum amount of overlap is obtained when cell_stride == 1.
At the other extreme, you would have no overlap if cell_stride == block_size.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be protected
by a mutex lock except for the case where you are copying the configuration
(via copy_configuration()) of a hog_image object to many other threads.
In this case, it is safe to copy the configuration of a shared object so long
as no other operations are performed on it.
!*/</font>
<font color='#0000FF'>public</font>:
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> cell_size <font color='#5555FF'>=</font> cell_size_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> block_size <font color='#5555FF'>=</font> block_size_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> cell_stride <font color='#5555FF'>=</font> cell_stride_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_orientation_bins <font color='#5555FF'>=</font> num_orientation_bins_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>int</u></font> gradient_type <font color='#5555FF'>=</font> gradient_type_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>int</u></font> interpolation_type <font color='#5555FF'>=</font> interpolation_type_;
<font color='#0000FF'>const</font> <font color='#0000FF'>static</font> <font color='#0000FF'><u>long</u></font> min_size <font color='#5555FF'>=</font> cell_size<font color='#5555FF'>*</font>block_size<font color='#5555FF'>+</font><font color='#979000'>2</font>;
<font color='#0000FF'>typedef</font> matrix<font color='#5555FF'><</font><font color='#0000FF'><u>double</u></font>, block_size<font color='#5555FF'>*</font>block_size<font color='#5555FF'>*</font>num_orientation_bins, <font color='#979000'>1</font><font color='#5555FF'>></font> descriptor_type;
<b><a name='hog_image'></a>hog_image</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
ensures
- this object is properly initialized
!*/</font>
<font color='#0000FF'><u>void</u></font> <b><a name='clear'></a>clear</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
ensures
- this object will have its initial value
!*/</font>
<font color='#0000FF'><u>void</u></font> <b><a name='copy_configuration'></a>copy_configuration</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> hog_image<font color='#5555FF'>&</font> item
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
ensures
- copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two hog_image
objects H1 and H2, the following sequence of instructions should always
result in both of them having the exact same state.
H2.copy_configuration(H1);
H1.load(img);
H2.load(img);
!*/</font>
<font color='#0000FF'>template</font> <font color='#5555FF'><</font>
<font color='#0000FF'>typename</font> image_type
<font color='#5555FF'>></font>
<font color='#0000FF'>inline</font> <font color='#0000FF'><u>void</u></font> <b><a name='load'></a>load</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> image_type<font color='#5555FF'>&</font> img
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
requires
- image_type is a dlib::matrix or something convertible to a matrix
via mat().
- pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
ensures
- if (img.nr() < min_size || img.nc() < min_size) then
- the image is too small so we don't compute anything on it
- #size() == 0
- else
- generates a HOG image from the given image.
- #size() > 0
!*/</font>
<font color='#0000FF'>inline</font> <font color='#0000FF'><u>void</u></font> <b><a name='unload'></a>unload</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
ensures
- #nr() == 0
- #nc() == 0
- clears only the state information which is populated by load(). For
example, let H be a hog_image object. Then consider the two sequences
of instructions:
Sequence 1:
H.load(img);
H.unload();
H.load(img);
Sequence 2:
H.load(img);
Both sequence 1 and sequence 2 should have the same effect on H.
!*/</font>
<font color='#0000FF'>inline</font> <font color='#0000FF'><u>size_t</u></font> <b><a name='size'></a>size</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns nr()*nc()
!*/</font>
<font color='#0000FF'>inline</font> <font color='#0000FF'><u>long</u></font> <b><a name='nr'></a>nr</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns the number of rows in this HOG image
!*/</font>
<font color='#0000FF'>inline</font> <font color='#0000FF'><u>long</u></font> <b><a name='nc'></a>nc</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns the number of columns in this HOG image
!*/</font>
<font color='#0000FF'><u>long</u></font> <b><a name='get_num_dimensions'></a>get_num_dimensions</b> <font face='Lucida Console'>(</font>
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns the number of dimensions in the feature vectors generated by
this object.
- In particular, returns the value block_size*block_size*num_orientation_bins
!*/</font>
<font color='#0000FF'>inline</font> <font color='#0000FF'>const</font> descriptor_type<font color='#5555FF'>&</font> <b><a name='operator'></a>operator</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font face='Lucida Console'>(</font>
<font color='#0000FF'><u>long</u></font> row,
<font color='#0000FF'><u>long</u></font> col
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
requires
- 0 <= row < nr()
- 0 <= col < nc()
ensures
- returns the descriptor for the HOG block at the given row and column. This descriptor
will include information from a window that is located at get_block_rect(row,col) in
the original image given to load().
- The returned descriptor vector will have get_num_dimensions() elements.
!*/</font>
<font color='#0000FF'>const</font> rectangle <b><a name='get_block_rect'></a>get_block_rect</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'><u>long</u></font> row,
<font color='#0000FF'><u>long</u></font> col
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns a rectangle that tells you what part of the original image is associated
with a particular HOG block. That is, what part of the input image is associated
with (*this)(row,col).
- The returned rectangle will be cell_size*block_size pixels wide and tall.
!*/</font>
<font color='#0000FF'>const</font> point <b><a name='image_to_feat_space'></a>image_to_feat_space</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> point<font color='#5555FF'>&</font> p
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- Each local feature is extracted from a certain point in the input image.
This function returns the identity of the local feature corresponding
to the image location p. Or in other words, let P == image_to_feat_space(p),
then (*this)(P.y(),P.x()) == the local feature closest to, or centered at,
the point p in the input image. Note that some image points might not have
corresponding feature locations. E.g. border points or points outside the
image. In these cases the returned point will be outside get_rect(*this).
!*/</font>
<font color='#0000FF'>const</font> rectangle <b><a name='image_to_feat_space'></a>image_to_feat_space</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> rectangle<font color='#5555FF'>&</font> rect
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
(i.e. maps a rectangle from image space to feature space)
!*/</font>
<font color='#0000FF'>const</font> point <b><a name='feat_to_image_space'></a>feat_to_image_space</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> point<font color='#5555FF'>&</font> p
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- returns the location in the input image space corresponding to the center
of the local feature at point p. In other words, this function computes
the inverse of image_to_feat_space(). Note that it may only do so approximately,
since more than one image location might correspond to the same local feature.
That is, image_to_feat_space() might not be invertible so this function gives
the closest possible result.
!*/</font>
<font color='#0000FF'>const</font> rectangle <b><a name='feat_to_image_space'></a>feat_to_image_space</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> rectangle<font color='#5555FF'>&</font> rect
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>;
<font color='#009900'>/*!
ensures
- return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
(i.e. maps a rectangle from feature space to image space)
!*/</font>
<b>}</b>;
<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
<font color='#0000FF'>template</font> <font color='#5555FF'><</font>
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T1,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T2,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T3,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T4,
<font color='#0000FF'><u>int</u></font> T5,
<font color='#0000FF'><u>int</u></font> T6
<font color='#5555FF'>></font>
<font color='#0000FF'><u>void</u></font> <b><a name='serialize'></a>serialize</b> <font face='Lucida Console'>(</font>
<font color='#0000FF'>const</font> hog_image<font color='#5555FF'><</font>T1,T2,T3,T4,T5,T6<font color='#5555FF'>></font><font color='#5555FF'>&</font> item,
std::ostream<font color='#5555FF'>&</font> out
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
provides serialization support
!*/</font>
<font color='#0000FF'>template</font> <font color='#5555FF'><</font>
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T1,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T2,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T3,
<font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> T4,
<font color='#0000FF'><u>int</u></font> T5,
<font color='#0000FF'><u>int</u></font> T6
<font color='#5555FF'>></font>
<font color='#0000FF'><u>void</u></font> <b><a name='deserialize'></a>deserialize</b> <font face='Lucida Console'>(</font>
hog_image<font color='#5555FF'><</font>T1,T2,T3,T4,T5,T6<font color='#5555FF'>></font><font color='#5555FF'>&</font> item,
std::istream<font color='#5555FF'>&</font> in
<font face='Lucida Console'>)</font>;
<font color='#009900'>/*!
provides deserialization support
!*/</font>
<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
<b>}</b>
<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_HoG_ABSTRACT_Hh_
</font>
</pre></body></html> |