Spaces:
Running
Running
"""Generators for classes of graphs used in studying social networks.""" | |
import itertools | |
import math | |
import networkx as nx | |
from networkx.utils import py_random_state | |
__all__ = [ | |
"caveman_graph", | |
"connected_caveman_graph", | |
"relaxed_caveman_graph", | |
"random_partition_graph", | |
"planted_partition_graph", | |
"gaussian_random_partition_graph", | |
"ring_of_cliques", | |
"windmill_graph", | |
"stochastic_block_model", | |
"LFR_benchmark_graph", | |
] | |
def caveman_graph(l, k): | |
"""Returns a caveman graph of `l` cliques of size `k`. | |
Parameters | |
---------- | |
l : int | |
Number of cliques | |
k : int | |
Size of cliques | |
Returns | |
------- | |
G : NetworkX Graph | |
caveman graph | |
Notes | |
----- | |
This returns an undirected graph, it can be converted to a directed | |
graph using :func:`nx.to_directed`, or a multigraph using | |
``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is | |
described in [1]_ and it is unclear which of the directed | |
generalizations is most useful. | |
Examples | |
-------- | |
>>> G = nx.caveman_graph(3, 3) | |
See also | |
-------- | |
connected_caveman_graph | |
References | |
---------- | |
.. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' | |
Amer. J. Soc. 105, 493-527, 1999. | |
""" | |
# l disjoint cliques of size k | |
G = nx.empty_graph(l * k) | |
if k > 1: | |
for start in range(0, l * k, k): | |
edges = itertools.combinations(range(start, start + k), 2) | |
G.add_edges_from(edges) | |
return G | |
def connected_caveman_graph(l, k): | |
"""Returns a connected caveman graph of `l` cliques of size `k`. | |
The connected caveman graph is formed by creating `n` cliques of size | |
`k`, then a single edge in each clique is rewired to a node in an | |
adjacent clique. | |
Parameters | |
---------- | |
l : int | |
number of cliques | |
k : int | |
size of cliques (k at least 2 or NetworkXError is raised) | |
Returns | |
------- | |
G : NetworkX Graph | |
connected caveman graph | |
Raises | |
------ | |
NetworkXError | |
If the size of cliques `k` is smaller than 2. | |
Notes | |
----- | |
This returns an undirected graph, it can be converted to a directed | |
graph using :func:`nx.to_directed`, or a multigraph using | |
``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is | |
described in [1]_ and it is unclear which of the directed | |
generalizations is most useful. | |
Examples | |
-------- | |
>>> G = nx.connected_caveman_graph(3, 3) | |
References | |
---------- | |
.. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.' | |
Amer. J. Soc. 105, 493-527, 1999. | |
""" | |
if k < 2: | |
raise nx.NetworkXError( | |
"The size of cliques in a connected caveman graph " "must be at least 2." | |
) | |
G = nx.caveman_graph(l, k) | |
for start in range(0, l * k, k): | |
G.remove_edge(start, start + 1) | |
G.add_edge(start, (start - 1) % (l * k)) | |
return G | |
def relaxed_caveman_graph(l, k, p, seed=None): | |
"""Returns a relaxed caveman graph. | |
A relaxed caveman graph starts with `l` cliques of size `k`. Edges are | |
then randomly rewired with probability `p` to link different cliques. | |
Parameters | |
---------- | |
l : int | |
Number of groups | |
k : int | |
Size of cliques | |
p : float | |
Probability of rewiring each edge. | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
Returns | |
------- | |
G : NetworkX Graph | |
Relaxed Caveman Graph | |
Raises | |
------ | |
NetworkXError | |
If p is not in [0,1] | |
Examples | |
-------- | |
>>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42) | |
References | |
---------- | |
.. [1] Santo Fortunato, Community Detection in Graphs, | |
Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174. | |
https://arxiv.org/abs/0906.0612 | |
""" | |
G = nx.caveman_graph(l, k) | |
nodes = list(G) | |
for u, v in G.edges(): | |
if seed.random() < p: # rewire the edge | |
x = seed.choice(nodes) | |
if G.has_edge(u, x): | |
continue | |
G.remove_edge(u, v) | |
G.add_edge(u, x) | |
return G | |
def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False): | |
"""Returns the random partition graph with a partition of sizes. | |
A partition graph is a graph of communities with sizes defined by | |
s in sizes. Nodes in the same group are connected with probability | |
p_in and nodes of different groups are connected with probability | |
p_out. | |
Parameters | |
---------- | |
sizes : list of ints | |
Sizes of groups | |
p_in : float | |
probability of edges with in groups | |
p_out : float | |
probability of edges between groups | |
directed : boolean optional, default=False | |
Whether to create a directed graph | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
Returns | |
------- | |
G : NetworkX Graph or DiGraph | |
random partition graph of size sum(gs) | |
Raises | |
------ | |
NetworkXError | |
If p_in or p_out is not in [0,1] | |
Examples | |
-------- | |
>>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01) | |
>>> len(G) | |
30 | |
>>> partition = G.graph["partition"] | |
>>> len(partition) | |
3 | |
Notes | |
----- | |
This is a generalization of the planted-l-partition described in | |
[1]_. It allows for the creation of groups of any size. | |
The partition is store as a graph attribute 'partition'. | |
References | |
---------- | |
.. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports | |
Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 | |
""" | |
# Use geometric method for O(n+m) complexity algorithm | |
# partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation')) | |
if not 0.0 <= p_in <= 1.0: | |
raise nx.NetworkXError("p_in must be in [0,1]") | |
if not 0.0 <= p_out <= 1.0: | |
raise nx.NetworkXError("p_out must be in [0,1]") | |
# create connection matrix | |
num_blocks = len(sizes) | |
p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)] | |
for r in range(num_blocks): | |
p[r][r] = p_in | |
return stochastic_block_model( | |
sizes, | |
p, | |
nodelist=None, | |
seed=seed, | |
directed=directed, | |
selfloops=False, | |
sparse=True, | |
) | |
def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False): | |
"""Returns the planted l-partition graph. | |
This model partitions a graph with n=l*k vertices in | |
l groups with k vertices each. Vertices of the same | |
group are linked with a probability p_in, and vertices | |
of different groups are linked with probability p_out. | |
Parameters | |
---------- | |
l : int | |
Number of groups | |
k : int | |
Number of vertices in each group | |
p_in : float | |
probability of connecting vertices within a group | |
p_out : float | |
probability of connected vertices between groups | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
directed : bool,optional (default=False) | |
If True return a directed graph | |
Returns | |
------- | |
G : NetworkX Graph or DiGraph | |
planted l-partition graph | |
Raises | |
------ | |
NetworkXError | |
If p_in,p_out are not in [0,1] or | |
Examples | |
-------- | |
>>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42) | |
See Also | |
-------- | |
random_partition_model | |
References | |
---------- | |
.. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning | |
on the planted partition model, | |
Random Struct. Algor. 18 (2001) 116-140. | |
.. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports | |
Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612 | |
""" | |
return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed) | |
def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None): | |
"""Generate a Gaussian random partition graph. | |
A Gaussian random partition graph is created by creating k partitions | |
each with a size drawn from a normal distribution with mean s and variance | |
s/v. Nodes are connected within clusters with probability p_in and | |
between clusters with probability p_out[1] | |
Parameters | |
---------- | |
n : int | |
Number of nodes in the graph | |
s : float | |
Mean cluster size | |
v : float | |
Shape parameter. The variance of cluster size distribution is s/v. | |
p_in : float | |
Probability of intra cluster connection. | |
p_out : float | |
Probability of inter cluster connection. | |
directed : boolean, optional default=False | |
Whether to create a directed graph or not | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
Returns | |
------- | |
G : NetworkX Graph or DiGraph | |
gaussian random partition graph | |
Raises | |
------ | |
NetworkXError | |
If s is > n | |
If p_in or p_out is not in [0,1] | |
Notes | |
----- | |
Note the number of partitions is dependent on s,v and n, and that the | |
last partition may be considerably smaller, as it is sized to simply | |
fill out the nodes [1] | |
See Also | |
-------- | |
random_partition_graph | |
Examples | |
-------- | |
>>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1) | |
>>> len(G) | |
100 | |
References | |
---------- | |
.. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner, | |
Experiments on Graph Clustering Algorithms, | |
In the proceedings of the 11th Europ. Symp. Algorithms, 2003. | |
""" | |
if s > n: | |
raise nx.NetworkXError("s must be <= n") | |
assigned = 0 | |
sizes = [] | |
while True: | |
size = int(seed.gauss(s, s / v + 0.5)) | |
if size < 1: # how to handle 0 or negative sizes? | |
continue | |
if assigned + size >= n: | |
sizes.append(n - assigned) | |
break | |
assigned += size | |
sizes.append(size) | |
return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed) | |
def ring_of_cliques(num_cliques, clique_size): | |
"""Defines a "ring of cliques" graph. | |
A ring of cliques graph is consisting of cliques, connected through single | |
links. Each clique is a complete graph. | |
Parameters | |
---------- | |
num_cliques : int | |
Number of cliques | |
clique_size : int | |
Size of cliques | |
Returns | |
------- | |
G : NetworkX Graph | |
ring of cliques graph | |
Raises | |
------ | |
NetworkXError | |
If the number of cliques is lower than 2 or | |
if the size of cliques is smaller than 2. | |
Examples | |
-------- | |
>>> G = nx.ring_of_cliques(8, 4) | |
See Also | |
-------- | |
connected_caveman_graph | |
Notes | |
----- | |
The `connected_caveman_graph` graph removes a link from each clique to | |
connect it with the next clique. Instead, the `ring_of_cliques` graph | |
simply adds the link without removing any link from the cliques. | |
""" | |
if num_cliques < 2: | |
raise nx.NetworkXError("A ring of cliques must have at least " "two cliques") | |
if clique_size < 2: | |
raise nx.NetworkXError("The cliques must have at least two nodes") | |
G = nx.Graph() | |
for i in range(num_cliques): | |
edges = itertools.combinations( | |
range(i * clique_size, i * clique_size + clique_size), 2 | |
) | |
G.add_edges_from(edges) | |
G.add_edge( | |
i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size) | |
) | |
return G | |
def windmill_graph(n, k): | |
"""Generate a windmill graph. | |
A windmill graph is a graph of `n` cliques each of size `k` that are all | |
joined at one node. | |
It can be thought of as taking a disjoint union of `n` cliques of size `k`, | |
selecting one point from each, and contracting all of the selected points. | |
Alternatively, one could generate `n` cliques of size `k-1` and one node | |
that is connected to all other nodes in the graph. | |
Parameters | |
---------- | |
n : int | |
Number of cliques | |
k : int | |
Size of cliques | |
Returns | |
------- | |
G : NetworkX Graph | |
windmill graph with n cliques of size k | |
Raises | |
------ | |
NetworkXError | |
If the number of cliques is less than two | |
If the size of the cliques are less than two | |
Examples | |
-------- | |
>>> G = nx.windmill_graph(4, 5) | |
Notes | |
----- | |
The node labeled `0` will be the node connected to all other nodes. | |
Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters | |
are in the opposite order as the parameters of this method. | |
""" | |
if n < 2: | |
msg = "A windmill graph must have at least two cliques" | |
raise nx.NetworkXError(msg) | |
if k < 2: | |
raise nx.NetworkXError("The cliques must have at least two nodes") | |
G = nx.disjoint_union_all( | |
itertools.chain( | |
[nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1)) | |
) | |
) | |
G.add_edges_from((0, i) for i in range(k, G.number_of_nodes())) | |
return G | |
def stochastic_block_model( | |
sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True | |
): | |
"""Returns a stochastic block model graph. | |
This model partitions the nodes in blocks of arbitrary sizes, and places | |
edges between pairs of nodes independently, with a probability that depends | |
on the blocks. | |
Parameters | |
---------- | |
sizes : list of ints | |
Sizes of blocks | |
p : list of list of floats | |
Element (r,s) gives the density of edges going from the nodes | |
of group r to nodes of group s. | |
p must match the number of groups (len(sizes) == len(p)), | |
and it must be symmetric if the graph is undirected. | |
nodelist : list, optional | |
The block tags are assigned according to the node identifiers | |
in nodelist. If nodelist is None, then the ordering is the | |
range [0,sum(sizes)-1]. | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
directed : boolean optional, default=False | |
Whether to create a directed graph or not. | |
selfloops : boolean optional, default=False | |
Whether to include self-loops or not. | |
sparse: boolean optional, default=True | |
Use the sparse heuristic to speed up the generator. | |
Returns | |
------- | |
g : NetworkX Graph or DiGraph | |
Stochastic block model graph of size sum(sizes) | |
Raises | |
------ | |
NetworkXError | |
If probabilities are not in [0,1]. | |
If the probability matrix is not square (directed case). | |
If the probability matrix is not symmetric (undirected case). | |
If the sizes list does not match nodelist or the probability matrix. | |
If nodelist contains duplicate. | |
Examples | |
-------- | |
>>> sizes = [75, 75, 300] | |
>>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]] | |
>>> g = nx.stochastic_block_model(sizes, probs, seed=0) | |
>>> len(g) | |
450 | |
>>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True) | |
>>> for v in H.nodes(data=True): | |
... print(round(v[1]["density"], 3)) | |
... | |
0.245 | |
0.348 | |
0.405 | |
>>> for v in H.edges(data=True): | |
... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3)) | |
... | |
0.051 | |
0.022 | |
0.07 | |
See Also | |
-------- | |
random_partition_graph | |
planted_partition_graph | |
gaussian_random_partition_graph | |
gnp_random_graph | |
References | |
---------- | |
.. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S., | |
"Stochastic blockmodels: First steps", | |
Social networks, 5(2), 109-137, 1983. | |
""" | |
# Check if dimensions match | |
if len(sizes) != len(p): | |
raise nx.NetworkXException("'sizes' and 'p' do not match.") | |
# Check for probability symmetry (undirected) and shape (directed) | |
for row in p: | |
if len(p) != len(row): | |
raise nx.NetworkXException("'p' must be a square matrix.") | |
if not directed: | |
p_transpose = [list(i) for i in zip(*p)] | |
for i in zip(p, p_transpose): | |
for j in zip(i[0], i[1]): | |
if abs(j[0] - j[1]) > 1e-08: | |
raise nx.NetworkXException("'p' must be symmetric.") | |
# Check for probability range | |
for row in p: | |
for prob in row: | |
if prob < 0 or prob > 1: | |
raise nx.NetworkXException("Entries of 'p' not in [0,1].") | |
# Check for nodelist consistency | |
if nodelist is not None: | |
if len(nodelist) != sum(sizes): | |
raise nx.NetworkXException("'nodelist' and 'sizes' do not match.") | |
if len(nodelist) != len(set(nodelist)): | |
raise nx.NetworkXException("nodelist contains duplicate.") | |
else: | |
nodelist = range(sum(sizes)) | |
# Setup the graph conditionally to the directed switch. | |
block_range = range(len(sizes)) | |
if directed: | |
g = nx.DiGraph() | |
block_iter = itertools.product(block_range, block_range) | |
else: | |
g = nx.Graph() | |
block_iter = itertools.combinations_with_replacement(block_range, 2) | |
# Split nodelist in a partition (list of sets). | |
size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)] | |
g.graph["partition"] = [ | |
set(nodelist[size_cumsum[x] : size_cumsum[x + 1]]) | |
for x in range(len(size_cumsum) - 1) | |
] | |
# Setup nodes and graph name | |
for block_id, nodes in enumerate(g.graph["partition"]): | |
for node in nodes: | |
g.add_node(node, block=block_id) | |
g.name = "stochastic_block_model" | |
# Test for edge existence | |
parts = g.graph["partition"] | |
for i, j in block_iter: | |
if i == j: | |
if directed: | |
if selfloops: | |
edges = itertools.product(parts[i], parts[i]) | |
else: | |
edges = itertools.permutations(parts[i], 2) | |
else: | |
edges = itertools.combinations(parts[i], 2) | |
if selfloops: | |
edges = itertools.chain(edges, zip(parts[i], parts[i])) | |
for e in edges: | |
if seed.random() < p[i][j]: | |
g.add_edge(*e) | |
else: | |
edges = itertools.product(parts[i], parts[j]) | |
if sparse: | |
if p[i][j] == 1: # Test edges cases p_ij = 0 or 1 | |
for e in edges: | |
g.add_edge(*e) | |
elif p[i][j] > 0: | |
while True: | |
try: | |
logrand = math.log(seed.random()) | |
skip = math.floor(logrand / math.log(1 - p[i][j])) | |
# consume "skip" edges | |
next(itertools.islice(edges, skip, skip), None) | |
e = next(edges) | |
g.add_edge(*e) # __safe | |
except StopIteration: | |
break | |
else: | |
for e in edges: | |
if seed.random() < p[i][j]: | |
g.add_edge(*e) # __safe | |
return g | |
def _zipf_rv_below(gamma, xmin, threshold, seed): | |
"""Returns a random value chosen from the bounded Zipf distribution. | |
Repeatedly draws values from the Zipf distribution until the | |
threshold is met, then returns that value. | |
""" | |
result = nx.utils.zipf_rv(gamma, xmin, seed) | |
while result > threshold: | |
result = nx.utils.zipf_rv(gamma, xmin, seed) | |
return result | |
def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed): | |
"""Returns a list of numbers obeying a constrained power law distribution. | |
``gamma`` and ``low`` are the parameters for the Zipf distribution. | |
``high`` is the maximum allowed value for values draw from the Zipf | |
distribution. For more information, see :func:`_zipf_rv_below`. | |
``condition`` and ``length`` are Boolean-valued functions on | |
lists. While generating the list, random values are drawn and | |
appended to the list until ``length`` is satisfied by the created | |
list. Once ``condition`` is satisfied, the sequence generated in | |
this way is returned. | |
``max_iters`` indicates the number of times to generate a list | |
satisfying ``length``. If the number of iterations exceeds this | |
value, :exc:`~networkx.exception.ExceededMaxIterations` is raised. | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
""" | |
for i in range(max_iters): | |
seq = [] | |
while not length(seq): | |
seq.append(_zipf_rv_below(gamma, low, high, seed)) | |
if condition(seq): | |
return seq | |
raise nx.ExceededMaxIterations("Could not create power law sequence") | |
def _hurwitz_zeta(x, q, tolerance): | |
"""The Hurwitz zeta function, or the Riemann zeta function of two arguments. | |
``x`` must be greater than one and ``q`` must be positive. | |
This function repeatedly computes subsequent partial sums until | |
convergence, as decided by ``tolerance``. | |
""" | |
z = 0 | |
z_prev = -float("inf") | |
k = 0 | |
while abs(z - z_prev) > tolerance: | |
z_prev = z | |
z += 1 / ((k + q) ** x) | |
k += 1 | |
return z | |
def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters): | |
"""Returns a minimum degree from the given average degree.""" | |
# Defines zeta function whether or not Scipy is available | |
try: | |
from scipy.special import zeta | |
except ImportError: | |
def zeta(x, q): | |
return _hurwitz_zeta(x, q, tolerance) | |
min_deg_top = max_degree | |
min_deg_bot = 1 | |
min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
itrs = 0 | |
mid_avg_deg = 0 | |
while abs(mid_avg_deg - average_degree) > tolerance: | |
if itrs > max_iters: | |
raise nx.ExceededMaxIterations("Could not match average_degree") | |
mid_avg_deg = 0 | |
for x in range(int(min_deg_mid), max_degree + 1): | |
mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid) | |
if mid_avg_deg > average_degree: | |
min_deg_top = min_deg_mid | |
min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
else: | |
min_deg_bot = min_deg_mid | |
min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot | |
itrs += 1 | |
# return int(min_deg_mid + 0.5) | |
return round(min_deg_mid) | |
def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed): | |
"""Returns a list of sets, each of which represents a community. | |
``degree_seq`` is the degree sequence that must be met by the | |
graph. | |
``community_sizes`` is the community size distribution that must be | |
met by the generated list of sets. | |
``mu`` is a float in the interval [0, 1] indicating the fraction of | |
intra-community edges incident to each node. | |
``max_iters`` is the number of times to try to add a node to a | |
community. This must be greater than the length of | |
``degree_seq``, otherwise this function will always fail. If | |
the number of iterations exceeds this value, | |
:exc:`~networkx.exception.ExceededMaxIterations` is raised. | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
The communities returned by this are sets of integers in the set {0, | |
..., *n* - 1}, where *n* is the length of ``degree_seq``. | |
""" | |
# This assumes the nodes in the graph will be natural numbers. | |
result = [set() for _ in community_sizes] | |
n = len(degree_seq) | |
free = list(range(n)) | |
for i in range(max_iters): | |
v = free.pop() | |
c = seed.choice(range(len(community_sizes))) | |
# s = int(degree_seq[v] * (1 - mu) + 0.5) | |
s = round(degree_seq[v] * (1 - mu)) | |
# If the community is large enough, add the node to the chosen | |
# community. Otherwise, return it to the list of unaffiliated | |
# nodes. | |
if s < community_sizes[c]: | |
result[c].add(v) | |
else: | |
free.append(v) | |
# If the community is too big, remove a node from it. | |
if len(result[c]) > community_sizes[c]: | |
free.append(result[c].pop()) | |
if not free: | |
return result | |
msg = "Could not assign communities; try increasing min_community" | |
raise nx.ExceededMaxIterations(msg) | |
def LFR_benchmark_graph( | |
n, | |
tau1, | |
tau2, | |
mu, | |
average_degree=None, | |
min_degree=None, | |
max_degree=None, | |
min_community=None, | |
max_community=None, | |
tol=1.0e-7, | |
max_iters=500, | |
seed=None, | |
): | |
r"""Returns the LFR benchmark graph. | |
This algorithm proceeds as follows: | |
1) Find a degree sequence with a power law distribution, and minimum | |
value ``min_degree``, which has approximate average degree | |
``average_degree``. This is accomplished by either | |
a) specifying ``min_degree`` and not ``average_degree``, | |
b) specifying ``average_degree`` and not ``min_degree``, in which | |
case a suitable minimum degree will be found. | |
``max_degree`` can also be specified, otherwise it will be set to | |
``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges | |
joining it to nodes in communities other than its own and $(1 - | |
\mu) \mathrm{deg}(u)$ edges joining it to nodes in its own | |
community. | |
2) Generate community sizes according to a power law distribution | |
with exponent ``tau2``. If ``min_community`` and | |
``max_community`` are not specified they will be selected to be | |
``min_degree`` and ``max_degree``, respectively. Community sizes | |
are generated until the sum of their sizes equals ``n``. | |
3) Each node will be randomly assigned a community with the | |
condition that the community is large enough for the node's | |
intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as | |
described in step 2. If a community grows too large, a random node | |
will be selected for reassignment to a new community, until all | |
nodes have been assigned a community. | |
4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$ | |
intra-community edges and $\mu \mathrm{deg}(u)$ inter-community | |
edges. | |
Parameters | |
---------- | |
n : int | |
Number of nodes in the created graph. | |
tau1 : float | |
Power law exponent for the degree distribution of the created | |
graph. This value must be strictly greater than one. | |
tau2 : float | |
Power law exponent for the community size distribution in the | |
created graph. This value must be strictly greater than one. | |
mu : float | |
Fraction of inter-community edges incident to each node. This | |
value must be in the interval [0, 1]. | |
average_degree : float | |
Desired average degree of nodes in the created graph. This value | |
must be in the interval [0, *n*]. Exactly one of this and | |
``min_degree`` must be specified, otherwise a | |
:exc:`NetworkXError` is raised. | |
min_degree : int | |
Minimum degree of nodes in the created graph. This value must be | |
in the interval [0, *n*]. Exactly one of this and | |
``average_degree`` must be specified, otherwise a | |
:exc:`NetworkXError` is raised. | |
max_degree : int | |
Maximum degree of nodes in the created graph. If not specified, | |
this is set to ``n``, the total number of nodes in the graph. | |
min_community : int | |
Minimum size of communities in the graph. If not specified, this | |
is set to ``min_degree``. | |
max_community : int | |
Maximum size of communities in the graph. If not specified, this | |
is set to ``n``, the total number of nodes in the graph. | |
tol : float | |
Tolerance when comparing floats, specifically when comparing | |
average degree values. | |
max_iters : int | |
Maximum number of iterations to try to create the community sizes, | |
degree distribution, and community affiliations. | |
seed : integer, random_state, or None (default) | |
Indicator of random number generation state. | |
See :ref:`Randomness<randomness>`. | |
Returns | |
------- | |
G : NetworkX graph | |
The LFR benchmark graph generated according to the specified | |
parameters. | |
Each node in the graph has a node attribute ``'community'`` that | |
stores the community (that is, the set of nodes) that includes | |
it. | |
Raises | |
------ | |
NetworkXError | |
If any of the parameters do not meet their upper and lower bounds: | |
- ``tau1`` and ``tau2`` must be strictly greater than 1. | |
- ``mu`` must be in [0, 1]. | |
- ``max_degree`` must be in {1, ..., *n*}. | |
- ``min_community`` and ``max_community`` must be in {0, ..., | |
*n*}. | |
If not exactly one of ``average_degree`` and ``min_degree`` is | |
specified. | |
If ``min_degree`` is not specified and a suitable ``min_degree`` | |
cannot be found. | |
ExceededMaxIterations | |
If a valid degree sequence cannot be created within | |
``max_iters`` number of iterations. | |
If a valid set of community sizes cannot be created within | |
``max_iters`` number of iterations. | |
If a valid community assignment cannot be created within ``10 * | |
n * max_iters`` number of iterations. | |
Examples | |
-------- | |
Basic usage:: | |
>>> from networkx.generators.community import LFR_benchmark_graph | |
>>> n = 250 | |
>>> tau1 = 3 | |
>>> tau2 = 1.5 | |
>>> mu = 0.1 | |
>>> G = LFR_benchmark_graph( | |
... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10 | |
... ) | |
Continuing the example above, you can get the communities from the | |
node attributes of the graph:: | |
>>> communities = {frozenset(G.nodes[v]["community"]) for v in G} | |
Notes | |
----- | |
This algorithm differs slightly from the original way it was | |
presented in [1]. | |
1) Rather than connecting the graph via a configuration model then | |
rewiring to match the intra-community and inter-community | |
degrees, we do this wiring explicitly at the end, which should be | |
equivalent. | |
2) The code posted on the author's website [2] calculates the random | |
power law distributed variables and their average using | |
continuous approximations, whereas we use the discrete | |
distributions here as both degree and community size are | |
discrete. | |
Though the authors describe the algorithm as quite robust, testing | |
during development indicates that a somewhat narrower parameter set | |
is likely to successfully produce a graph. Some suggestions have | |
been provided in the event of exceptions. | |
References | |
---------- | |
.. [1] "Benchmark graphs for testing community detection algorithms", | |
Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi, | |
Phys. Rev. E 78, 046110 2008 | |
.. [2] https://www.santofortunato.net/resources | |
""" | |
# Perform some basic parameter validation. | |
if not tau1 > 1: | |
raise nx.NetworkXError("tau1 must be greater than one") | |
if not tau2 > 1: | |
raise nx.NetworkXError("tau2 must be greater than one") | |
if not 0 <= mu <= 1: | |
raise nx.NetworkXError("mu must be in the interval [0, 1]") | |
# Validate parameters for generating the degree sequence. | |
if max_degree is None: | |
max_degree = n | |
elif not 0 < max_degree <= n: | |
raise nx.NetworkXError("max_degree must be in the interval (0, n]") | |
if not ((min_degree is None) ^ (average_degree is None)): | |
raise nx.NetworkXError( | |
"Must assign exactly one of min_degree and" " average_degree" | |
) | |
if min_degree is None: | |
min_degree = _generate_min_degree( | |
tau1, average_degree, max_degree, tol, max_iters | |
) | |
# Generate a degree sequence with a power law distribution. | |
low, high = min_degree, max_degree | |
def condition(seq): | |
return sum(seq) % 2 == 0 | |
def length(seq): | |
return len(seq) >= n | |
deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed) | |
# Validate parameters for generating the community size sequence. | |
if min_community is None: | |
min_community = min(deg_seq) | |
if max_community is None: | |
max_community = max(deg_seq) | |
# Generate a community size sequence with a power law distribution. | |
# | |
# TODO The original code incremented the number of iterations each | |
# time a new Zipf random value was drawn from the distribution. This | |
# differed from the way the number of iterations was incremented in | |
# `_powerlaw_degree_sequence`, so this code was changed to match | |
# that one. As a result, this code is allowed many more chances to | |
# generate a valid community size sequence. | |
low, high = min_community, max_community | |
def condition(seq): | |
return sum(seq) == n | |
def length(seq): | |
return sum(seq) >= n | |
comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed) | |
# Generate the communities based on the given degree sequence and | |
# community sizes. | |
max_iters *= 10 * n | |
communities = _generate_communities(deg_seq, comms, mu, max_iters, seed) | |
# Finally, generate the benchmark graph based on the given | |
# communities, joining nodes according to the intra- and | |
# inter-community degrees. | |
G = nx.Graph() | |
G.add_nodes_from(range(n)) | |
for c in communities: | |
for u in c: | |
while G.degree(u) < round(deg_seq[u] * (1 - mu)): | |
v = seed.choice(list(c)) | |
G.add_edge(u, v) | |
while G.degree(u) < deg_seq[u]: | |
v = seed.choice(range(n)) | |
if v not in c: | |
G.add_edge(u, v) | |
G.nodes[u]["community"] = c | |
return G | |