|
|
|
|
|
#ifndef DLIB_BOTTOM_uP_CLUSTER_Hh_ |
|
#define DLIB_BOTTOM_uP_CLUSTER_Hh_ |
|
|
|
#include <queue> |
|
#include <map> |
|
|
|
#include "bottom_up_cluster_abstract.h" |
|
#include "../algs.h" |
|
#include "../matrix.h" |
|
#include "../disjoint_subsets.h" |
|
#include "../graph_utils.h" |
|
|
|
|
|
namespace dlib |
|
{ |
|
|
|
|
|
|
|
namespace buc_impl |
|
{ |
|
inline void merge_sets ( |
|
matrix<double>& dists, |
|
unsigned long dest, |
|
unsigned long src |
|
) |
|
{ |
|
for (long r = 0; r < dists.nr(); ++r) |
|
dists(dest,r) = dists(r,dest) = std::max(dists(r,dest), dists(r,src)); |
|
} |
|
|
|
struct compare_dist |
|
{ |
|
bool operator() ( |
|
const sample_pair& a, |
|
const sample_pair& b |
|
) const |
|
{ |
|
return a.distance() > b.distance(); |
|
} |
|
}; |
|
} |
|
|
|
|
|
|
|
template < |
|
typename EXP |
|
> |
|
unsigned long bottom_up_cluster ( |
|
const matrix_exp<EXP>& dists_, |
|
std::vector<unsigned long>& labels, |
|
unsigned long min_num_clusters, |
|
double max_dist = std::numeric_limits<double>::infinity() |
|
) |
|
{ |
|
matrix<double> dists = matrix_cast<double>(dists_); |
|
|
|
DLIB_CASSERT(dists.nr() == dists.nc() && min_num_clusters > 0, |
|
"\t unsigned long bottom_up_cluster()" |
|
<< "\n\t Invalid inputs were given to this function." |
|
<< "\n\t dists.nr(): " << dists.nr() |
|
<< "\n\t dists.nc(): " << dists.nc() |
|
<< "\n\t min_num_clusters: " << min_num_clusters |
|
); |
|
|
|
using namespace buc_impl; |
|
|
|
labels.resize(dists.nr()); |
|
disjoint_subsets sets; |
|
sets.set_size(dists.nr()); |
|
if (labels.size() == 0) |
|
return 0; |
|
|
|
|
|
|
|
std::priority_queue<sample_pair, std::vector<sample_pair>, compare_dist> que; |
|
for (long r = 0; r < dists.nr(); ++r) |
|
for (long c = r+1; c < dists.nc(); ++c) |
|
que.push(sample_pair(r,c,dists(r,c))); |
|
|
|
|
|
for (unsigned long iter = min_num_clusters; iter < sets.size(); ++iter) |
|
{ |
|
|
|
double best_dist = que.top().distance(); |
|
unsigned long a = sets.find_set(que.top().index1()); |
|
unsigned long b = sets.find_set(que.top().index2()); |
|
que.pop(); |
|
|
|
|
|
while(a == b || best_dist < dists(a,b)) |
|
{ |
|
|
|
|
|
if (a != b) |
|
que.push(sample_pair(a, b, dists(a, b))); |
|
|
|
best_dist = que.top().distance(); |
|
a = sets.find_set(que.top().index1()); |
|
b = sets.find_set(que.top().index2()); |
|
que.pop(); |
|
} |
|
|
|
|
|
|
|
if (best_dist > max_dist) |
|
break; |
|
unsigned long news = sets.merge_sets(a,b); |
|
unsigned long olds = (news==a)?b:a; |
|
merge_sets(dists, news, olds); |
|
} |
|
|
|
|
|
|
|
std::map<unsigned long, unsigned long> relabel; |
|
for (unsigned long r = 0; r < labels.size(); ++r) |
|
{ |
|
unsigned long l = sets.find_set(r); |
|
|
|
if (relabel.count(l) == 0) |
|
{ |
|
unsigned long next = relabel.size(); |
|
relabel[l] = next; |
|
} |
|
labels[r] = relabel[l]; |
|
} |
|
|
|
|
|
return relabel.size(); |
|
} |
|
|
|
|
|
|
|
|
|
struct snl_range |
|
{ |
|
snl_range() = default; |
|
snl_range(double val) : lower(val), upper(val) {} |
|
snl_range(double l, double u) : lower(l), upper(u) { DLIB_ASSERT(lower <= upper)} |
|
|
|
double lower = 0; |
|
double upper = 0; |
|
|
|
double width() const { return upper-lower; } |
|
bool operator<(const snl_range& item) const { return lower < item.lower; } |
|
}; |
|
|
|
inline snl_range merge(const snl_range& a, const snl_range& b) |
|
{ |
|
return snl_range(std::min(a.lower, b.lower), std::max(a.upper, b.upper)); |
|
} |
|
|
|
inline double distance (const snl_range& a, const snl_range& b) |
|
{ |
|
return std::max(a.lower,b.lower) - std::min(a.upper,b.upper); |
|
} |
|
|
|
inline std::ostream& operator<< (std::ostream& out, const snl_range& item ) |
|
{ |
|
out << "["<<item.lower<<","<<item.upper<<"]"; |
|
return out; |
|
} |
|
|
|
|
|
|
|
inline std::vector<snl_range> segment_number_line ( |
|
const std::vector<double>& x, |
|
const double max_range_width |
|
) |
|
{ |
|
DLIB_CASSERT(max_range_width >= 0); |
|
|
|
|
|
|
|
std::vector<snl_range> ranges; |
|
for (auto v : x) |
|
ranges.push_back(v); |
|
std::sort(ranges.begin(), ranges.end()); |
|
|
|
std::vector<snl_range> greedy_final_ranges; |
|
if (ranges.size() == 0) |
|
return greedy_final_ranges; |
|
|
|
|
|
|
|
|
|
greedy_final_ranges.push_back(ranges[0]); |
|
for (size_t i = 1; i < ranges.size(); ++i) |
|
{ |
|
auto m = merge(greedy_final_ranges.back(), ranges[i]); |
|
if (m.width() <= max_range_width) |
|
greedy_final_ranges.back() = m; |
|
else |
|
greedy_final_ranges.push_back(ranges[i]); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::vector<sample_pair> edges; |
|
for (size_t i = 1; i < ranges.size(); ++i) |
|
edges.push_back(sample_pair(i-1,i, distance(ranges[i-1],ranges[i]))); |
|
std::sort(edges.begin(), edges.end(), order_by_distance<sample_pair>); |
|
|
|
disjoint_subsets sets; |
|
sets.set_size(ranges.size()); |
|
|
|
|
|
for (auto edge : edges) |
|
{ |
|
|
|
unsigned long a = sets.find_set(edge.index1()); |
|
unsigned long b = sets.find_set(edge.index2()); |
|
|
|
|
|
auto m = merge(ranges[a], ranges[b]); |
|
if (m.width() <= max_range_width) |
|
{ |
|
unsigned long news = sets.merge_sets(a,b); |
|
ranges[news] = m; |
|
} |
|
} |
|
|
|
|
|
|
|
std::vector<snl_range> final_ranges; |
|
std::vector<bool> already_output(ranges.size(), false); |
|
for (unsigned long i = 0; i < sets.size(); ++i) |
|
{ |
|
auto s = sets.find_set(i); |
|
if (!already_output[s]) |
|
{ |
|
final_ranges.push_back(ranges[s]); |
|
already_output[s] = true; |
|
} |
|
} |
|
|
|
|
|
|
|
if (final_ranges.size() <= greedy_final_ranges.size()) |
|
return final_ranges; |
|
else |
|
return greedy_final_ranges; |
|
} |
|
|
|
|
|
|
|
} |
|
|
|
#endif |
|
|
|
|