process_mining / model.py
linpershey's picture
adjust transparency
0257d64
import os
from typing import List, Optional, Tuple, Any
from collections import OrderedDict
import pandas as pd
from loguru import logger
import pm4py
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt
from PIL import Image
from pydantic import BaseModel
class ProcessMap(BaseModel):
net: Any
start_activities: List | None
end_activities: List | None
img: Any | None
def dfg2networkx( dfg, start, end):
"""Dfg to networkx
Argument
dfg: a list of dict of edges from directly-follow-graph
start: a dict of start activities
end: a dict of end activities
Return
nx: networkx graph object
"""
PROCESS_START = '#Start#'
PROCESS_END = '#End#'
nodes = { PROCESS_START: 0, PROCESS_END: 1}
node_idx = 2
for activity in start:
assert activity not in nodes, f"#ERROR: {activity} exists"
nodes[activity] = node_idx
node_idx += 1
for activity in end:
assert activity not in nodes, f"#ERROR: {activity} exists"
nodes[activity] = node_idx
node_idx += 1
for node in dfg:
left_activity = node[0]
if left_activity not in nodes:
nodes[left_activity] = node_idx
node_idx +=1
right_activity = node[1]
if right_activity not in nodes:
nodes[right_activity] = node_idx
node_idx +=1
nodes = list(nodes.keys())
edges = []
for activity in start:
from_id = str(PROCESS_START)
to_id = str(activity)
edges.append( ( PROCESS_START, activity) )
for activity in end:
from_id = str(activity)
to_id = str(PROCESS_END)
edges.append( ( activity, PROCESS_END) )
for transition in dfg:
edges.append( ( transition[0], transition[1]) )
nx_graph = nx.DiGraph()
nx_graph.add_nodes_from( nodes)
nx_graph.add_edges_from(edges)
return nx_graph
def discover_process_map_variants( df, top_k: int = 0, type: str = 'dfg'):
"""Discover process map from data frame (raw event log)
Argument
df: a pandas dataframe
top_k: top k variants
type: dfg or petri
Return
dfg, start_activities, end_activities
"""
event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
if top_k > 0:
event_log = pm4py.filter_variants_top_k( event_log, k = top_k)
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
return dfg, start_activities, end_activities
def discover_process_map_activities_connections( df, activity_rank: int = 0, connection_rank: int = 0, state: dict = {}, type: str = 'dfg'):
"""Discover process map from data frame (raw event log)
Argument
df: a pandas dataframe
top_k: top k variants
type: dfg or petri
Return
dfg, start_activities, end_activities
"""
event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
full_dfg, _, __ = pm4py.discover_dfg(event_log)
ranked_connections = OrderedDict(sorted(full_dfg.items(), key=lambda item: item[1], reverse=True))
if activity_rank > 0:
pass
if connection_rank > 0:
top_variant_connections = state.get('top_variant_connections', [])
filtered_connections = list(ranked_connections.keys())[ : (connection_rank+ len(ranked_connections))]
else:
filtered_connections = list(ranked_connections.keys())
event_log = pm4py.filter_directly_follows_relation( event_log, relations = filtered_connections)
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
return dfg, start_activities, end_activities
def discover_process_map( df: pd.DataFrame, type: str = 'dfg'):
"""
"""
event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
if type=='dfg':
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
return dfg, start_activities, end_activities
elif type=='petrinet':
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
pm4py.view_petri_net( petri_net=net, initial_marking=im, final_marking=fm)
file_path = 'output/petri_net.png'
pm4py.save_vis_petri_net( net, im, fm, file_path)
img = Image.open(file_path)
return net, img
elif type=='bpmn':
net = pm4py.discover_bpmn_inductive(event_log)
pm4py.view_bpmn(net, format='png')
file_path = 'output/bpmn.png'
pm4py.save_vis_bpmn( net, file_path)
img = Image.open(file_path)
return net, img
else:
raise Exception(f"Invalid type: {type}")
def view_networkx( nx_graph, layout):
"""
Argument
nx_graph
Return
graph object
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)
"""
# Create node scatter plot
node_trace = go.Scatter(
x=[layout[n][0] for n in nx_graph.nodes],
y=[layout[n][1] for n in nx_graph.nodes],
text=list(nx_graph.nodes),
mode='markers+text',
hovertext = [n for n in nx_graph.nodes],
textposition='top center',
marker=dict(size=20, color='LightSkyBlue', line=dict(width=2),opacity=0.5)
)
# Create edge lines
edge_trace = go.Scatter(
x=(),
y=(),
line=dict(width=1.5, color='#888'),
hoverinfo='none',
mode='lines'
)
# Add arrows for directed edges
annotations = []
for edge in nx_graph.edges:
x0, y0 = layout[edge[0]]
x1, y1 = layout[edge[1]]
edge_trace['x'] += (x0, x1, None)
edge_trace['y'] += (y0, y1, None)
# Calculate direction of the arrow
annotations.append(
dict(
ax=x0,
ay=y0,
axref='x',
ayref='y',
x=x1,
y=y1,
xref='x',
yref='y',
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=5, # 增加箭头的宽度
arrowcolor='rgba(128, 128, 128, 0.5)'
)
)
# Draw the figure
fig = go.Figure(data=[edge_trace, node_trace],
layout=go.Layout(
showlegend=False,
hovermode='closest',
margin=dict(b=0, l=0, r=0, t=0),
annotations=annotations,
xaxis=dict(showgrid=False, zeroline=False),
yaxis=dict(showgrid=False, zeroline=False)
))
fig = fig.update_xaxes(showticklabels=False)
fig = fig.update_yaxes(showticklabels=False)
return fig
def view_process_map( nx_graph, process_type: str = 'dfg', layout_type: str = 'sfdp'):
"""
"""
layout = nx.nx_agraph.graphviz_layout( nx_graph, prog=layout_type)
# min_x, max_x = min([ node_loc[0] for node, node_loc in layout.items()]), max([ node_loc[0] for node, node_loc in layout.items()])
# min_y, max_y = min([ node_loc[1] for node, node_loc in layout.items()]), max([ node_loc[1] for node, node_loc in layout.items()])
# layout['#Start#'] = ( min_x, min_y)
# layout['#End#'] = (max_x, max_y)
fig = view_networkx(nx_graph, layout)
return fig