File size: 7,641 Bytes
9375c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<html><!-- Created using the cpp_pretty_printer from the dlib C++ library.  See http://dlib.net for updates. --><head><title>dlib C++ Library - chinese_whispers_abstract.h</title></head><body bgcolor='white'><pre>
<font color='#009900'>// Copyright (C) 2012  Davis E. King ([email protected])
</font><font color='#009900'>// License: Boost Software License   See LICENSE.txt for the full license.
</font><font color='#0000FF'>#undef</font> DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
<font color='#0000FF'>#ifdef</font> DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_

<font color='#0000FF'>#include</font> <font color='#5555FF'>&lt;</font>vector<font color='#5555FF'>&gt;</font>
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../rand.h.html'>../rand.h</a>"
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../graph_utils/ordered_sample_pair_abstract.h.html'>../graph_utils/ordered_sample_pair_abstract.h</a>"
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../graph_utils/sample_pair_abstract.h.html'>../graph_utils/sample_pair_abstract.h</a>"

<font color='#0000FF'>namespace</font> dlib
<b>{</b>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='chinese_whispers'></a>chinese_whispers</b> <font face='Lucida Console'>(</font>
        <font color='#0000FF'>const</font> std::vector<font color='#5555FF'>&lt;</font>ordered_sample_pair<font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> edges,
        std::vector<font color='#5555FF'>&lt;</font><font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font><font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> labels,
        <font color='#0000FF'>const</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_iterations,
        dlib::rand<font color='#5555FF'>&amp;</font> rnd
    <font face='Lucida Console'>)</font>;
    <font color='#009900'>/*!
        requires
            - is_ordered_by_index(edges) == true
        ensures
            - This function implements the graph clustering algorithm described in the
              paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
              Application to Natural Language Processing Problems by Chris Biemann.
            - Interprets edges as a directed graph.  That is, it contains the edges on the
              said graph and the ordered_sample_pair::distance() values define the edge
              weights (larger values indicating a stronger edge connection between the
              nodes).  If an edge has a distance() value of infinity then it is considered
              a "must link" edge.
            - returns the number of clusters found.
            - #labels.size() == max_index_plus_one(edges)
            - for all valid i:
                - #labels[i] == the cluster ID of the node with index i in the graph.  
                - 0 &lt;= #labels[i] &lt; the number of clusters found
                  (i.e. cluster IDs are assigned contiguously and start at 0) 
            - Duplicate edges are interpreted as if there had been just one edge with a
              distance value equal to the sum of all the duplicate edge's distance values.
            - The algorithm performs exactly num_iterations passes over the graph before
              terminating.
    !*/</font>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='chinese_whispers'></a>chinese_whispers</b> <font face='Lucida Console'>(</font>
        <font color='#0000FF'>const</font> std::vector<font color='#5555FF'>&lt;</font>sample_pair<font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> edges,
        std::vector<font color='#5555FF'>&lt;</font><font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font><font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> labels,
        <font color='#0000FF'>const</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_iterations,
        dlib::rand<font color='#5555FF'>&amp;</font> rnd
    <font face='Lucida Console'>)</font>;
    <font color='#009900'>/*!
        ensures
            - This function is identical to the above chinese_whispers() routine except
              that it operates on a vector of sample_pair objects instead of
              ordered_sample_pairs.  Therefore, this is simply a convenience routine.  In
              particular, it is implemented by transforming the given edges into
              ordered_sample_pairs and then calling the chinese_whispers() routine defined
              above.  
    !*/</font>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='chinese_whispers'></a>chinese_whispers</b> <font face='Lucida Console'>(</font>
        <font color='#0000FF'>const</font> std::vector<font color='#5555FF'>&lt;</font>ordered_sample_pair<font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> edges,
        std::vector<font color='#5555FF'>&lt;</font><font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font><font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> labels,
        <font color='#0000FF'>const</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_iterations <font color='#5555FF'>=</font> <font color='#979000'>100</font>
    <font face='Lucida Console'>)</font>;
    <font color='#009900'>/*!
        requires
            - is_ordered_by_index(edges) == true
        ensures
            - performs: return chinese_whispers(edges, labels, num_iterations, rnd)
              where rnd is a default initialized dlib::rand object.
    !*/</font>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> <b><a name='chinese_whispers'></a>chinese_whispers</b> <font face='Lucida Console'>(</font>
        <font color='#0000FF'>const</font> std::vector<font color='#5555FF'>&lt;</font>sample_pair<font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> edges,
        std::vector<font color='#5555FF'>&lt;</font><font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font><font color='#5555FF'>&gt;</font><font color='#5555FF'>&amp;</font> labels,
        <font color='#0000FF'>const</font> <font color='#0000FF'><u>unsigned</u></font> <font color='#0000FF'><u>long</u></font> num_iterations <font color='#5555FF'>=</font> <font color='#979000'>100</font>
    <font face='Lucida Console'>)</font>;
    <font color='#009900'>/*!
        ensures
            - performs: return chinese_whispers(edges, labels, num_iterations, rnd)
              where rnd is a default initialized dlib::rand object.
    !*/</font>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
<b>}</b>

<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
</font>

</pre></body></html>