Spaces:
Running
Running
File size: 8,086 Bytes
b200bda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
"""Lukes Algorithm for exact optimal weighted tree partitioning."""
from copy import deepcopy
from functools import lru_cache
from random import choice
import networkx as nx
from networkx.utils import not_implemented_for
__all__ = ["lukes_partitioning"]
D_EDGE_W = "weight"
D_EDGE_VALUE = 1.0
D_NODE_W = "weight"
D_NODE_VALUE = 1
PKEY = "partitions"
CLUSTER_EVAL_CACHE_SIZE = 2048
def _split_n_from(n, min_size_of_first_part):
# splits j in two parts of which the first is at least
# the second argument
assert n >= min_size_of_first_part
for p1 in range(min_size_of_first_part, n + 1):
yield p1, n - p1
@nx._dispatch(node_attrs="node_weight", edge_attrs="edge_weight")
def lukes_partitioning(G, max_size, node_weight=None, edge_weight=None):
"""Optimal partitioning of a weighted tree using the Lukes algorithm.
This algorithm partitions a connected, acyclic graph featuring integer
node weights and float edge weights. The resulting clusters are such
that the total weight of the nodes in each cluster does not exceed
max_size and that the weight of the edges that are cut by the partition
is minimum. The algorithm is based on [1]_.
Parameters
----------
G : NetworkX graph
max_size : int
Maximum weight a partition can have in terms of sum of
node_weight for all nodes in the partition
edge_weight : key
Edge data key to use as weight. If None, the weights are all
set to one.
node_weight : key
Node data key to use as weight. If None, the weights are all
set to one. The data must be int.
Returns
-------
partition : list
A list of sets of nodes representing the clusters of the
partition.
Raises
------
NotATree
If G is not a tree.
TypeError
If any of the values of node_weight is not int.
References
----------
.. [1] Lukes, J. A. (1974).
"Efficient Algorithm for the Partitioning of Trees."
IBM Journal of Research and Development, 18(3), 217–224.
"""
# First sanity check and tree preparation
if not nx.is_tree(G):
raise nx.NotATree("lukes_partitioning works only on trees")
else:
if nx.is_directed(G):
root = [n for n, d in G.in_degree() if d == 0]
assert len(root) == 1
root = root[0]
t_G = deepcopy(G)
else:
root = choice(list(G.nodes))
# this has the desirable side effect of not inheriting attributes
t_G = nx.dfs_tree(G, root)
# Since we do not want to screw up the original graph,
# if we have a blank attribute, we make a deepcopy
if edge_weight is None or node_weight is None:
safe_G = deepcopy(G)
if edge_weight is None:
nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W)
edge_weight = D_EDGE_W
if node_weight is None:
nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W)
node_weight = D_NODE_W
else:
safe_G = G
# Second sanity check
# The values of node_weight MUST BE int.
# I cannot see any room for duck typing without incurring serious
# danger of subtle bugs.
all_n_attr = nx.get_node_attributes(safe_G, node_weight).values()
for x in all_n_attr:
if not isinstance(x, int):
raise TypeError(
"lukes_partitioning needs integer "
f"values for node_weight ({node_weight})"
)
# SUBROUTINES -----------------------
# these functions are defined here for two reasons:
# - brevity: we can leverage global "safe_G"
# - caching: signatures are hashable
@not_implemented_for("undirected")
# this is intended to be called only on t_G
def _leaves(gr):
for x in gr.nodes:
if not nx.descendants(gr, x):
yield x
@not_implemented_for("undirected")
def _a_parent_of_leaves_only(gr):
tleaves = set(_leaves(gr))
for n in set(gr.nodes) - tleaves:
if all(x in tleaves for x in nx.descendants(gr, n)):
return n
@lru_cache(CLUSTER_EVAL_CACHE_SIZE)
def _value_of_cluster(cluster):
valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster]
return sum(safe_G.edges[e][edge_weight] for e in valid_edges)
def _value_of_partition(partition):
return sum(_value_of_cluster(frozenset(c)) for c in partition)
@lru_cache(CLUSTER_EVAL_CACHE_SIZE)
def _weight_of_cluster(cluster):
return sum(safe_G.nodes[n][node_weight] for n in cluster)
def _pivot(partition, node):
ccx = [c for c in partition if node in c]
assert len(ccx) == 1
return ccx[0]
def _concatenate_or_merge(partition_1, partition_2, x, i, ref_weight):
ccx = _pivot(partition_1, x)
cci = _pivot(partition_2, i)
merged_xi = ccx.union(cci)
# We first check if we can do the merge.
# If so, we do the actual calculations, otherwise we concatenate
if _weight_of_cluster(frozenset(merged_xi)) <= ref_weight:
cp1 = list(filter(lambda x: x != ccx, partition_1))
cp2 = list(filter(lambda x: x != cci, partition_2))
option_2 = [merged_xi] + cp1 + cp2
return option_2, _value_of_partition(option_2)
else:
option_1 = partition_1 + partition_2
return option_1, _value_of_partition(option_1)
# INITIALIZATION -----------------------
leaves = set(_leaves(t_G))
for lv in leaves:
t_G.nodes[lv][PKEY] = {}
slot = safe_G.nodes[lv][node_weight]
t_G.nodes[lv][PKEY][slot] = [{lv}]
t_G.nodes[lv][PKEY][0] = [{lv}]
for inner in [x for x in t_G.nodes if x not in leaves]:
t_G.nodes[inner][PKEY] = {}
slot = safe_G.nodes[inner][node_weight]
t_G.nodes[inner][PKEY][slot] = [{inner}]
# CORE ALGORITHM -----------------------
while True:
x_node = _a_parent_of_leaves_only(t_G)
weight_of_x = safe_G.nodes[x_node][node_weight]
best_value = 0
best_partition = None
bp_buffer = {}
x_descendants = nx.descendants(t_G, x_node)
for i_node in x_descendants:
for j in range(weight_of_x, max_size + 1):
for a, b in _split_n_from(j, weight_of_x):
if (
a not in t_G.nodes[x_node][PKEY]
or b not in t_G.nodes[i_node][PKEY]
):
# it's not possible to form this particular weight sum
continue
part1 = t_G.nodes[x_node][PKEY][a]
part2 = t_G.nodes[i_node][PKEY][b]
part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j)
if j not in bp_buffer or bp_buffer[j][1] < value:
# we annotate in the buffer the best partition for j
bp_buffer[j] = part, value
# we also keep track of the overall best partition
if best_value <= value:
best_value = value
best_partition = part
# as illustrated in Lukes, once we finished a child, we can
# discharge the partitions we found into the graph
# (the key phrase is make all x == x')
# so that they are used by the subsequent children
for w, (best_part_for_vl, vl) in bp_buffer.items():
t_G.nodes[x_node][PKEY][w] = best_part_for_vl
bp_buffer.clear()
# the absolute best partition for this node
# across all weights has to be stored at 0
t_G.nodes[x_node][PKEY][0] = best_partition
t_G.remove_nodes_from(x_descendants)
if x_node == root:
# the 0-labeled partition of root
# is the optimal one for the whole tree
return t_G.nodes[root][PKEY][0]
|