Spaces:
Running
Running
''' | |
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de). | |
PM4Py is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
PM4Py is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with PM4Py. If not, see <https://www.gnu.org/licenses/>. | |
''' | |
from abc import ABC | |
from collections import Counter | |
from typing import List, Optional, Collection, Any, Tuple, Generic, Dict | |
from pm4py.util import nx_utils | |
from pm4py.algo.discovery.inductive.cuts.abc import Cut, T | |
from pm4py.algo.discovery.inductive.dtypes.im_dfg import InductiveDFG | |
from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL, IMDataStructureDFG | |
from pm4py.objects.dfg import util as dfu | |
from pm4py.objects.dfg.obj import DFG | |
from pm4py.objects.process_tree.obj import Operator, ProcessTree | |
from pm4py.util.compression.dtypes import UVCL | |
class LoopCut(Cut[T], ABC, Generic[T]): | |
def operator(cls, parameters: Optional[Dict[str, Any]] = None) -> ProcessTree: | |
return ProcessTree(operator=Operator.LOOP) | |
def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[List[Collection[Any]]]: | |
""" | |
This method finds a loop cut in the dfg. | |
Implementation follows function LoopCut on page 190 of | |
"Robust Process Mining with Guarantees" by Sander J.J. Leemans (ISBN: 978-90-386-4257-4) | |
Basic Steps: | |
1. merge all start and end activities in one group ('do' group) | |
2. remove start/end activities from the dfg | |
3. detect connected components in (undirected representative) of the reduced graph | |
4. check if each component meets the start/end criteria of the loop cut definition (merge with the 'do' group if not) | |
5. return the cut if at least two groups remain | |
""" | |
dfg = obj.dfg | |
start_activities = set(dfg.start_activities.keys()) | |
end_activities = set(dfg.end_activities.keys()) | |
if len(dfg.graph) == 0: | |
return None | |
groups = [start_activities.union(end_activities)] | |
for c in cls._compute_connected_components(dfg, start_activities, end_activities): | |
groups.append(set(c.nodes)) | |
groups = cls._exclude_sets_non_reachable_from_start(dfg, start_activities, end_activities, groups) | |
groups = cls._exclude_sets_no_reachable_from_end(dfg, start_activities, end_activities, groups) | |
groups = cls._check_start_completeness(dfg, start_activities, end_activities, groups) | |
groups = cls._check_end_completeness(dfg, start_activities, end_activities, groups) | |
groups = list(filter(lambda g: len(g) > 0, groups)) | |
return groups if len(groups) > 1 else None | |
def _check_start_completeness(cls, dfg: DFG, start_activities: Collection[Any], end_activities: Collection[Any], | |
groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[Collection[Any]]: | |
i = 1 | |
while i < len(groups): | |
merge = False | |
for a in groups[i]: | |
if merge: | |
break | |
for (x, b) in dfg.graph: | |
if x == a and b in start_activities: | |
for s in start_activities: | |
if not (a, s) in dfg.graph: | |
merge = True | |
if merge: | |
groups[0] = set(groups[0]).union(groups[i]) | |
del groups[i] | |
continue | |
i = i + 1 | |
return groups | |
def _check_end_completeness(cls, dfg: DFG, start_activities: Collection[Any], end_activities: Collection[Any], | |
groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[Collection[Any]]: | |
i = 1 | |
while i < len(groups): | |
merge = False | |
for a in groups[i]: | |
if merge: | |
break | |
for (b, x) in dfg.graph: | |
if x == a and b in end_activities: | |
for e in end_activities: | |
if not (e, a) in dfg.graph: | |
merge = True | |
if merge: | |
groups[0] = set(groups[0]).union(groups[i]) | |
del groups[i] | |
continue | |
i = i + 1 | |
return groups | |
def _exclude_sets_non_reachable_from_start(cls, dfg: DFG, start_activities: Collection[Any], | |
end_activities: Collection[Any], | |
groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[Collection[Any]]: | |
for a in set(start_activities).difference(set(end_activities)): | |
for (x, b) in dfg.graph: | |
if x == a: | |
group_a, group_b = None, None | |
for group in groups: | |
group_a = group if a in group else group_a | |
group_b = group if b in group else group_b | |
groups = [group for group in groups if group != group_a and group != group_b] | |
# we are always merging on the do-part | |
groups.insert(0, group_a.union(group_b)) | |
return groups | |
def _exclude_sets_no_reachable_from_end(cls, dfg: DFG, start_activities: Collection[Any], | |
end_activities: Collection[Any], | |
groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[Collection[Any]]: | |
for b in set(end_activities).difference(start_activities): | |
for (a, x) in dfg.graph: | |
if x == b: | |
group_a, group_b = None, None | |
for group in groups: | |
group_a = group if a in group else group_a | |
group_b = group if b in group else group_b | |
groups = [group for group in groups if group != group_a and group != group_b] | |
groups.insert(0, group_a.union(group_b)) | |
return groups | |
def _compute_connected_components(cls, dfg: DFG, start_activities: Collection[Any], | |
end_activities: Collection[Any], parameters: Optional[Dict[str, Any]] = None): | |
nxd = dfu.as_nx_graph(dfg) | |
[nxd.remove_edge(a, b) for (a, b) in dfg.graph if | |
a in start_activities or a in end_activities or b in start_activities or b in end_activities] | |
[nxd.remove_node(a) for a in start_activities if nxd.has_node(a)] | |
[nxd.remove_node(a) for a in end_activities if nxd.has_node(a)] | |
nxu = nxd.to_undirected() | |
return [nxd.subgraph(c).copy() for c in nx_utils.connected_components(nxu)] | |
class LoopCutUVCL(LoopCut[IMDataStructureUVCL]): | |
def project(cls, obj: IMDataStructureUVCL, groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[IMDataStructureUVCL]: | |
do = groups[0] | |
redo = groups[1:] | |
redo_activities = [y for x in redo for y in x] | |
do_log = Counter() | |
redo_logs = [Counter() for i in range(len(redo))] | |
for t in obj.data_structure: | |
do_trace = tuple() | |
redo_trace = tuple() | |
for e in t: | |
if e in do: | |
do_trace = do_trace + (e,) | |
if len(redo_trace) > 0: | |
redo_logs = cls._append_trace_to_redo_log(redo_trace, redo_logs, redo, obj.data_structure[t]) | |
redo_trace = tuple() | |
else: | |
if e in redo_activities: | |
redo_trace = redo_trace + (e,) | |
if len(do_trace) > 0: | |
do_log.update({do_trace: obj.data_structure[t]}) | |
do_trace = tuple() | |
if len(redo_trace) > 0: | |
redo_logs = cls._append_trace_to_redo_log(redo_trace, redo_logs, redo) | |
do_log.update({do_trace: obj.data_structure[t]}) | |
logs = [do_log] | |
logs.extend(redo_logs) | |
return list(map(lambda l: IMDataStructureUVCL(l), logs)) | |
def _append_trace_to_redo_log(cls, redo_trace: Tuple, redo_logs: List[UVCL], redo_groups: List[Collection[Any]], | |
cardinality, parameters: Optional[Dict[str, Any]] = None) -> \ | |
List[UVCL]: | |
activities = set(x for x in redo_trace) | |
inte = [(i, len(activities.intersection(redo_groups[i]))) for i in range(len(redo_groups))] | |
inte = sorted(inte, key=lambda x: (x[1], x[0]), reverse=True) | |
redo_logs[inte[0][0]].update({redo_trace: cardinality}) | |
return redo_logs | |
class LoopCutDFG(LoopCut[IMDataStructureDFG]): | |
def project(cls, obj: IMDataStructureUVCL, groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[IMDataStructureDFG]: | |
dfg = obj.dfg | |
dfgs = [] | |
skippable = [False, False] | |
for gind, g in enumerate(groups): | |
dfn = DFG() | |
for (a, b) in dfg.graph: | |
if a in g and b in g: | |
dfn.graph[(a, b)] = dfg.graph[(a, b)] | |
if b in dfg.start_activities and a in dfg.end_activities: | |
skippable[1] = True | |
if gind == 0: | |
for a in dfg.start_activities: | |
if a in g: | |
dfn.start_activities[a] = dfg.start_activities[a] | |
else: | |
skippable[0] = True | |
for a in dfg.end_activities: | |
if a in g: | |
dfn.end_activities[a] = dfg.end_activities[a] | |
else: | |
skippable[1] = True | |
elif gind == 1: | |
for a in g: | |
dfn.start_activities[a] = 1 | |
dfn.end_activities[a] = 1 | |
dfgs.append(dfn) | |
return [IMDataStructureDFG(InductiveDFG(dfg=dfgs[i], skip=skippable[i])) for i in range(len(dfgs))] | |