// Copyright (C) 2015 Davis E. King ([email protected]) | |
// License: Boost Software License See LICENSE.txt for the full license. | |
namespace dlib | |
{ | |
// ---------------------------------------------------------------------------------------- | |
template < | |
typename EXP | |
> | |
unsigned long bottom_up_cluster ( | |
const matrix_exp<EXP>& dists, | |
std::vector<unsigned long>& labels, | |
unsigned long min_num_clusters, | |
double max_dist = std::numeric_limits<double>::infinity() | |
); | |
/*! | |
requires | |
- dists.nr() == dists.nc() | |
- min_num_clusters > 0 | |
- dists == trans(dists) | |
(l.e. dists should be symmetric) | |
ensures | |
- Runs a bottom up agglomerative clustering algorithm. | |
- Interprets dists as a matrix that gives the distances between dists.nr() | |
items. In particular, we take dists(i,j) to be the distance between the ith | |
and jth element of some set. This function clusters the elements of this set | |
into at least min_num_clusters (or dists.nr() if there aren't enough | |
elements). Additionally, within each cluster, the maximum pairwise distance | |
between any two cluster elements is <= max_dist. | |
- returns the number of clusters found. | |
- #labels.size() == dists.nr() | |
- for all valid i: | |
- #labels[i] == the cluster ID of the node with index i (i.e. the node | |
corresponding to the distances dists(i,*)). | |
- 0 <= #labels[i] < the number of clusters found | |
(i.e. cluster IDs are assigned contiguously and start at 0) | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
// ---------------------------------------------------------------------------------------- | |
struct snl_range | |
{ | |
/*! | |
WHAT THIS OBJECT REPRESENTS | |
This object represents an interval on the real number line. It is used | |
to store the outputs of the segment_number_line() routine defined below. | |
!*/ | |
snl_range( | |
); | |
/*! | |
ensures | |
- #lower == 0 | |
- #upper == 0 | |
!*/ | |
snl_range( | |
double val | |
); | |
/*! | |
ensures | |
- #lower == val | |
- #upper == val | |
!*/ | |
snl_range( | |
double l, | |
double u | |
); | |
/*! | |
requires | |
- l <= u | |
ensures | |
- #lower == l | |
- #upper == u | |
!*/ | |
double lower; | |
double upper; | |
double width( | |
) const { return upper-lower; } | |
/*! | |
ensures | |
- returns the width of this interval on the number line. | |
!*/ | |
bool operator<(const snl_range& item) const { return lower < item.lower; } | |
/*! | |
ensures | |
- provides a total ordering of snl_range objects assuming they are | |
non-overlapping. | |
!*/ | |
}; | |
std::ostream& operator<< (std::ostream& out, const snl_range& item ); | |
/*! | |
ensures | |
- prints item to out in the form [lower,upper]. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
std::vector<snl_range> segment_number_line ( | |
const std::vector<double>& x, | |
const double max_range_width | |
); | |
/*! | |
requires | |
- max_range_width >= 0 | |
ensures | |
- Finds a clustering of the values in x and returns the ranges that define the | |
clustering. This routine uses a combination of bottom up clustering and a | |
simple greedy scan to try and find the most compact set of ranges that | |
contain all the values in x. | |
- This routine has approximately linear runtime. | |
- Every value in x will be contained inside one of the returned snl_range | |
objects; | |
- All returned snl_range object's will have a width() <= max_range_width and | |
will also be non-overlapping. | |
!*/ | |
// ---------------------------------------------------------------------------------------- | |
} | |