File size: 4,556 Bytes
9375c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Copyright (C) 2012  Davis E. King ([email protected])
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_CHINESE_WHISPErS_Hh_
#define DLIB_CHINESE_WHISPErS_Hh_

#include "chinese_whispers_abstract.h"
#include <vector>
#include "../rand.h"
#include "../graph_utils/edge_list_graphs.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    inline unsigned long chinese_whispers (
        const std::vector<ordered_sample_pair>& edges,
        std::vector<unsigned long>& labels,
        const unsigned long num_iterations,
        dlib::rand& rnd
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(is_ordered_by_index(edges),
                    "\t unsigned long chinese_whispers()"
                    << "\n\t Invalid inputs were given to this function"
        );

        labels.clear();
        if (edges.size() == 0)
            return 0;

        std::vector<std::pair<unsigned long, unsigned long> > neighbors;
        find_neighbor_ranges(edges, neighbors);

        // Initialize the labels, each node gets a different label.
        labels.resize(neighbors.size());
        for (unsigned long i = 0; i < labels.size(); ++i)
            labels[i] = i;


        for (unsigned long iter = 0; iter < neighbors.size()*num_iterations; ++iter)
        {
            // Pick a random node.
            const unsigned long idx = rnd.get_random_64bit_number()%neighbors.size();

            // Count how many times each label happens amongst our neighbors.
            std::map<unsigned long, double> labels_to_counts;
            const unsigned long end = neighbors[idx].second;
            for (unsigned long i = neighbors[idx].first; i != end; ++i)
            {
                labels_to_counts[labels[edges[i].index2()]] += edges[i].distance();
            }

            // find the most common label
            std::map<unsigned long, double>::iterator i;
            double best_score = -std::numeric_limits<double>::infinity();
            unsigned long best_label = labels[idx];
            for (i = labels_to_counts.begin(); i != labels_to_counts.end(); ++i)
            {
                if (i->second > best_score)
                {
                    best_score = i->second;
                    best_label = i->first;
                }
            }

            labels[idx] = best_label;
        }


        // Remap the labels into a contiguous range.  First we find the
        // mapping.
        std::map<unsigned long,unsigned long> label_remap;
        for (unsigned long i = 0; i < labels.size(); ++i)
        {
            const unsigned long next_id = label_remap.size();
            if (label_remap.count(labels[i]) == 0)
                label_remap[labels[i]] = next_id;
        }
        // now apply the mapping to all the labels.
        for (unsigned long i = 0; i < labels.size(); ++i)
        {
            labels[i] = label_remap[labels[i]];
        }

        return label_remap.size();
    }

// ----------------------------------------------------------------------------------------

    inline unsigned long chinese_whispers (
        const std::vector<sample_pair>& edges,
        std::vector<unsigned long>& labels,
        const unsigned long num_iterations,
        dlib::rand& rnd
    )
    {
        std::vector<ordered_sample_pair> oedges;
        convert_unordered_to_ordered(edges, oedges);
        std::sort(oedges.begin(), oedges.end(), &order_by_index<ordered_sample_pair>);

        return chinese_whispers(oedges, labels, num_iterations, rnd);
    }

// ----------------------------------------------------------------------------------------

    inline unsigned long chinese_whispers (
        const std::vector<sample_pair>& edges,
        std::vector<unsigned long>& labels,
        const unsigned long num_iterations = 100
    )
    {
        dlib::rand rnd;
        return chinese_whispers(edges, labels, num_iterations, rnd);
    }

// ----------------------------------------------------------------------------------------

    inline unsigned long chinese_whispers (
        const std::vector<ordered_sample_pair>& edges,
        std::vector<unsigned long>& labels,
        const unsigned long num_iterations = 100
    )
    {
        dlib::rand rnd;
        return chinese_whispers(edges, labels, num_iterations, rnd);
    }

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_CHINESE_WHISPErS_Hh_