"""Function for detecting communities based on Louvain Community Detection Algorithm""" from collections import defaultdict, deque import networkx as nx from networkx.algorithms.community import modularity from networkx.utils import py_random_state __all__ = ["louvain_communities", "louvain_partitions"] @py_random_state("seed") @nx._dispatch(edge_attrs="weight") def louvain_communities( G, weight="weight", resolution=1, threshold=0.0000001, seed=None ): r"""Find the best partition of a graph using the Louvain Community Detection Algorithm. Louvain Community Detection Algorithm is a simple method to extract the community structure of a network. This is a heuristic method based on modularity optimization. [1]_ The algorithm works in 2 steps. On the first step it assigns every node to be in its own community and then for each node it tries to find the maximum positive modularity gain by moving each node to all of its neighbor communities. If no positive gain is achieved the node remains in its original community. The modularity gain obtained by moving an isolated node $i$ into a community $C$ can easily be calculated by the following formula (combining [1]_ [2]_ and some algebra): .. math:: \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2} where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$, $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$ is the resolution parameter. For the directed case the modularity gain can be computed using this formula according to [3]_ .. math:: \Delta Q = \frac{k_{i,in}}{m} - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2} where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident to nodes in $C$. The first phase continues until no individual move can improve the modularity. The second phase consists in building a new network whose nodes are now the communities found in the first phase. To do so, the weights of the links between the new nodes are given by the sum of the weight of the links between nodes in the corresponding two communities. Once this phase is complete it is possible to reapply the first phase creating bigger communities with increased modularity. The above two phases are executed until no modularity gain is achieved (or is less than the `threshold`). Be careful with self-loops in the input graph. These are treated as previously reduced communities -- as if the process had been started in the middle of the algorithm. Large self-loop edge weights thus represent strong communities and in practice may be hard to add other nodes to. If your input graph edge weights for self-loops do not represent already reduced communities you may want to remove the self-loops before inputting that graph. Parameters ---------- G : NetworkX graph weight : string or None, optional (default="weight") The name of an edge attribute that holds the numerical value used as a weight. If None then each edge has weight 1. resolution : float, optional (default=1) If resolution is less than 1, the algorithm favors larger communities. Greater than 1 favors smaller communities threshold : float, optional (default=0.0000001) Modularity gain threshold for each level. If the gain of modularity between 2 levels of the algorithm is less than the given threshold then the algorithm stops and returns the resulting communities. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness`. Returns ------- list A list of sets (partition of `G`). Each set represents one community and contains all the nodes that constitute it. Examples -------- >>> import networkx as nx >>> G = nx.petersen_graph() >>> nx.community.louvain_communities(G, seed=123) [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}] Notes ----- The order in which the nodes are considered can affect the final output. In the algorithm the ordering happens using a random shuffle. References ---------- .. [1] Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008 .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks. [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784 See Also -------- louvain_partitions """ d = louvain_partitions(G, weight, resolution, threshold, seed) q = deque(d, maxlen=1) return q.pop() @py_random_state("seed") @nx._dispatch(edge_attrs="weight") def louvain_partitions( G, weight="weight", resolution=1, threshold=0.0000001, seed=None ): """Yields partitions for each level of the Louvain Community Detection Algorithm Louvain Community Detection Algorithm is a simple method to extract the community structure of a network. This is a heuristic method based on modularity optimization. [1]_ The partitions at each level (step of the algorithm) form a dendrogram of communities. A dendrogram is a diagram representing a tree and each level represents a partition of the G graph. The top level contains the smallest communities and as you traverse to the bottom of the tree the communities get bigger and the overall modularity increases making the partition better. Each level is generated by executing the two phases of the Louvain Community Detection Algorithm. Be careful with self-loops in the input graph. These are treated as previously reduced communities -- as if the process had been started in the middle of the algorithm. Large self-loop edge weights thus represent strong communities and in practice may be hard to add other nodes to. If your input graph edge weights for self-loops do not represent already reduced communities you may want to remove the self-loops before inputting that graph. Parameters ---------- G : NetworkX graph weight : string or None, optional (default="weight") The name of an edge attribute that holds the numerical value used as a weight. If None then each edge has weight 1. resolution : float, optional (default=1) If resolution is less than 1, the algorithm favors larger communities. Greater than 1 favors smaller communities threshold : float, optional (default=0.0000001) Modularity gain threshold for each level. If the gain of modularity between 2 levels of the algorithm is less than the given threshold then the algorithm stops and returns the resulting communities. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness`. Yields ------ list A list of sets (partition of `G`). Each set represents one community and contains all the nodes that constitute it. References ---------- .. [1] Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008) See Also -------- louvain_communities """ partition = [{u} for u in G.nodes()] if nx.is_empty(G): yield partition return mod = modularity(G, partition, resolution=resolution, weight=weight) is_directed = G.is_directed() if G.is_multigraph(): graph = _convert_multigraph(G, weight, is_directed) else: graph = G.__class__() graph.add_nodes_from(G) graph.add_weighted_edges_from(G.edges(data=weight, default=1)) m = graph.size(weight="weight") partition, inner_partition, improvement = _one_level( graph, m, partition, resolution, is_directed, seed ) improvement = True while improvement: # gh-5901 protect the sets in the yielded list from further manipulation here yield [s.copy() for s in partition] new_mod = modularity( graph, inner_partition, resolution=resolution, weight="weight" ) if new_mod - mod <= threshold: return mod = new_mod graph = _gen_graph(graph, inner_partition) partition, inner_partition, improvement = _one_level( graph, m, partition, resolution, is_directed, seed ) def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None): """Calculate one level of the Louvain partitions tree Parameters ---------- G : NetworkX Graph/DiGraph The graph from which to detect communities m : number The size of the graph `G`. partition : list of sets of nodes A valid partition of the graph `G` resolution : positive number The resolution parameter for computing the modularity of a partition is_directed : bool True if `G` is a directed graph. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness`. """ node2com = {u: i for i, u in enumerate(G.nodes())} inner_partition = [{u} for u in G.nodes()] if is_directed: in_degrees = dict(G.in_degree(weight="weight")) out_degrees = dict(G.out_degree(weight="weight")) Stot_in = list(in_degrees.values()) Stot_out = list(out_degrees.values()) # Calculate weights for both in and out neighbours without considering self-loops nbrs = {} for u in G: nbrs[u] = defaultdict(float) for _, n, wt in G.out_edges(u, data="weight"): if u != n: nbrs[u][n] += wt for n, _, wt in G.in_edges(u, data="weight"): if u != n: nbrs[u][n] += wt else: degrees = dict(G.degree(weight="weight")) Stot = list(degrees.values()) nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G} rand_nodes = list(G.nodes) seed.shuffle(rand_nodes) nb_moves = 1 improvement = False while nb_moves > 0: nb_moves = 0 for u in rand_nodes: best_mod = 0 best_com = node2com[u] weights2com = _neighbor_weights(nbrs[u], node2com) if is_directed: in_degree = in_degrees[u] out_degree = out_degrees[u] Stot_in[best_com] -= in_degree Stot_out[best_com] -= out_degree remove_cost = ( -weights2com[best_com] / m + resolution * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com]) / m**2 ) else: degree = degrees[u] Stot[best_com] -= degree remove_cost = -weights2com[best_com] / m + resolution * ( Stot[best_com] * degree ) / (2 * m**2) for nbr_com, wt in weights2com.items(): if is_directed: gain = ( remove_cost + wt / m - resolution * ( out_degree * Stot_in[nbr_com] + in_degree * Stot_out[nbr_com] ) / m**2 ) else: gain = ( remove_cost + wt / m - resolution * (Stot[nbr_com] * degree) / (2 * m**2) ) if gain > best_mod: best_mod = gain best_com = nbr_com if is_directed: Stot_in[best_com] += in_degree Stot_out[best_com] += out_degree else: Stot[best_com] += degree if best_com != node2com[u]: com = G.nodes[u].get("nodes", {u}) partition[node2com[u]].difference_update(com) inner_partition[node2com[u]].remove(u) partition[best_com].update(com) inner_partition[best_com].add(u) improvement = True nb_moves += 1 node2com[u] = best_com partition = list(filter(len, partition)) inner_partition = list(filter(len, inner_partition)) return partition, inner_partition, improvement def _neighbor_weights(nbrs, node2com): """Calculate weights between node and its neighbor communities. Parameters ---------- nbrs : dictionary Dictionary with nodes' neighbours as keys and their edge weight as value. node2com : dictionary Dictionary with all graph's nodes as keys and their community index as value. """ weights = defaultdict(float) for nbr, wt in nbrs.items(): weights[node2com[nbr]] += wt return weights def _gen_graph(G, partition): """Generate a new graph based on the partitions of a given graph""" H = G.__class__() node2com = {} for i, part in enumerate(partition): nodes = set() for node in part: node2com[node] = i nodes.update(G.nodes[node].get("nodes", {node})) H.add_node(i, nodes=nodes) for node1, node2, wt in G.edges(data=True): wt = wt["weight"] com1 = node2com[node1] com2 = node2com[node2] temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"] H.add_edge(com1, com2, weight=wt + temp) return H def _convert_multigraph(G, weight, is_directed): """Convert a Multigraph to normal Graph""" if is_directed: H = nx.DiGraph() else: H = nx.Graph() H.add_nodes_from(G) for u, v, wt in G.edges(data=weight, default=1): if H.has_edge(u, v): H[u][v]["weight"] += wt else: H.add_edge(u, v, weight=wt) return H