kota commited on
Commit
1089f07
·
0 Parent(s):

initial commit

Browse files
Files changed (11) hide show
  1. .gitattributes +37 -0
  2. .gitignore +170 -0
  3. .gitmodules +0 -0
  4. README.md +11 -0
  5. app.py +106 -0
  6. gapp.py +160 -0
  7. model.py +216 -0
  8. packages.txt +2 -0
  9. process_map.ipynb +0 -0
  10. requirements.txt +176 -0
  11. sapp.py +24 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sqlite filter=lfs diff=lfs merge=lfs -text
37
+ *.xes.gz filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # JetBrains
132
+ .idea
133
+
134
+ *.db
135
+
136
+ .DS_Store
137
+
138
+ vectorstore.pkl
139
+ langchain.readthedocs.io/
140
+
141
+ __pycache__/
142
+ .idea/
143
+ .ipynb_checkpoints/
144
+ *.bin
145
+ *.exe
146
+ *.msi
147
+ output/*
148
+ trained_models/*
149
+ !trained_models/.gitkeep
150
+ pretrained_models/*
151
+ !pretrained_models/.gitkeep
152
+ !pretrained_models/embedding/
153
+ pretrained_models/embedding/*
154
+ !pretrained_models/embedding/.gitkeep
155
+ runs
156
+ numpy_files/
157
+ #log*
158
+ #tmp*
159
+ data/
160
+ !data/.gitkeep
161
+ output/
162
+ !output/.gitkeep
163
+ .env
164
+ *.env
165
+
166
+ clients/
167
+ !clients/.gitkeep
168
+
169
+ creds/
170
+
.gitmodules ADDED
File without changes
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Process Mining
3
+ emoji: 🤗
4
+ colorFrom: yellow
5
+ colorTo: green
6
+ python_version: '3.10'
7
+ sdk: gradio
8
+ sdk_version: 4.37.2
9
+ app_file: sapp.py
10
+ pinned: false
11
+ ---
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dash
2
+ from dash import dcc, html
3
+ from dash.dependencies import Input, Output
4
+ import plotly.graph_objects as go
5
+ import networkx as nx
6
+
7
+ # Create a directed graph
8
+ G = nx.DiGraph()
9
+
10
+ # Add nodes
11
+ G.add_nodes_from([1, 2, 3, 4, 5, 6, 7, 8, 9])
12
+
13
+ # Add directed edges
14
+ G.add_edges_from([(1, 2), (1, 3), (2, 4), (3, 5), (1, 6), (4,7), (4,8),(5,7), (5,8), (7,9), (8,9), (6,9), (6,6)])
15
+
16
+ # Initialize the Dash app
17
+ app = dash.Dash(__name__)
18
+
19
+ app.layout = html.Div([
20
+ dcc.Dropdown(
21
+ id='node-dropdown',
22
+ options=[{'label': f'Node {i}', 'value': i} for i in G.nodes],
23
+ value=None,
24
+ placeholder="Select a node to filter"
25
+ ),
26
+ dcc.Graph(id='network-graph')
27
+ ])
28
+
29
+ @app.callback(
30
+ Output('network-graph', 'figure'),
31
+ Input('node-dropdown', 'value')
32
+ )
33
+ def update_graph(selected_node):
34
+ if selected_node is not None:
35
+ nodes_to_filter = [selected_node]
36
+ else:
37
+ nodes_to_filter = []
38
+
39
+ filtered_graph = filter_nodes(G, nodes_to_filter)
40
+
41
+ pos = nx.spring_layout(filtered_graph)
42
+
43
+ node_trace = go.Scatter(
44
+ x=[pos[n][0] for n in filtered_graph.nodes],
45
+ y=[pos[n][1] for n in filtered_graph.nodes],
46
+ text=list(filtered_graph.nodes),
47
+ mode='markers+text',
48
+ textposition='top center',
49
+ marker=dict(size=20, color='LightSkyBlue', line=dict(width=2))
50
+ )
51
+
52
+ edge_trace = go.Scatter(
53
+ x=(),
54
+ y=(),
55
+ line=dict(width=1.5, color='Gray'),
56
+ hoverinfo='none',
57
+ mode='lines'
58
+ )
59
+
60
+ annotations = []
61
+ for edge in filtered_graph.edges:
62
+ x0, y0 = pos[edge[0]]
63
+ x1, y1 = pos[edge[1]]
64
+ edge_trace['x'] += (x0, x1, None)
65
+ edge_trace['y'] += (y0, y1, None)
66
+
67
+ annotations.append(
68
+ dict(
69
+ ax=x0,
70
+ ay=y0,
71
+ axref='x',
72
+ ayref='y',
73
+ x=x1,
74
+ y=y1,
75
+ xref='x',
76
+ yref='y',
77
+ showarrow=True,
78
+ arrowhead=2,
79
+ arrowsize=1,
80
+ arrowwidth=2,
81
+ arrowcolor='Gray'
82
+ )
83
+ )
84
+
85
+ fig = go.Figure(data=[edge_trace, node_trace],
86
+ layout=go.Layout(
87
+ showlegend=False,
88
+ hovermode='closest',
89
+ margin=dict(b=0, l=0, r=0, t=0),
90
+ annotations=annotations,
91
+ xaxis=dict(showgrid=False, zeroline=False),
92
+ yaxis=dict(showgrid=False, zeroline=False)
93
+ ))
94
+
95
+ return fig
96
+
97
+ def filter_nodes(graph, nodes_to_remove):
98
+ filtered_graph = graph.copy()
99
+ for node in nodes_to_remove:
100
+ if node in filtered_graph:
101
+ filtered_graph.remove_node(node)
102
+ return filtered_graph
103
+
104
+ if __name__ == '__main__':
105
+ app.run_server(debug=True, port=8050, host='0.0.0.0')
106
+
gapp.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from io import StringIO
4
+ from pathlib import Path
5
+ from collections import OrderedDict
6
+
7
+ import plotly.express as px
8
+ import gradio as gr
9
+ import pandas as pd
10
+ from dotenv import load_dotenv
11
+ # from PIL import Image
12
+ import matplotlib.pyplot as plt
13
+ # import cv2
14
+ # import numpy as np
15
+ import plotly.graph_objects as go
16
+ import networkx as nx
17
+
18
+ from model import dfg2networkx, discover_process_map, discover_process_map_activities_connections, discover_process_map_variants, view_process_map
19
+
20
+ load_dotenv()
21
+
22
+ logger = logging.getLogger(__name__)
23
+ logger.setLevel(logging.DEBUG)
24
+
25
+
26
+ def get_data(temp_file, state: dict):
27
+ # print(f"temp_file: {temp_file}")
28
+ if isinstance(temp_file, str):
29
+ # df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"])
30
+ df = pd.read_csv(temp_file, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp'])
31
+ df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp']) # format='%Y-%m-%d %H:%M:%S'
32
+ else:
33
+ # df = pd.read_csv(temp_file.name, ) # parse_dates=[ "Start", "Finish"]
34
+ df = pd.read_csv(temp_file.name, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp'])
35
+ df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'])
36
+ # logger.debug(df.head())
37
+ # logger.debug(df.dtypes)
38
+ state['df'] = df
39
+ return df, state
40
+
41
+
42
+ def get_stats(state: dict):
43
+ df = state.get('df', pd.DataFrame()).copy()
44
+ summary = pd.DataFrame({
45
+ "metric": ["資料筆數", "Case 數量", "Activity 數量", "起始時間", "結束時間"],
46
+ "value": [ df.shape[0], df['case_id'].nunique(), df['activity'].nunique(), df['timestamp'].min(), df['timestamp'].max() ]
47
+ })
48
+ case_stats = df.groupby(
49
+ by = ['case_id'], as_index=False
50
+ ).agg(count = ('activity', len)).reset_index()
51
+ logger.debug(f"case stats: {case_stats}")
52
+
53
+ case_lead_time = df.groupby(
54
+ by = ['case_id'], as_index=False
55
+ ).agg( duration = ('timestamp', lambda x: (x.max() - x.min()).total_seconds()//3600 )).reset_index()
56
+
57
+ def avg_duration(x):
58
+ return pd.Series({ "avg_duration": (x.timestamp.max() - x.timestamp.min()).total_seconds()//3600})
59
+ case_avg_duration = df.groupby(
60
+ by = ['case_id'], as_index=False
61
+ ).apply(
62
+ avg_duration
63
+ )
64
+
65
+ logger.debug(f"case lead time: {case_lead_time}")
66
+ return (
67
+ summary,
68
+ gr.BarPlot( case_stats, x="case_id", y="count", title="Case Stats", tooltip = ["case_id", "count"], width=None),
69
+ gr.BarPlot( case_lead_time, x="case_id", y="duration", title="Case Lead Time", tooltip = ["case_id", "duration"], width=None),
70
+ gr.BarPlot( case_avg_duration, x="case_id", y="avg_duration", title="Case Average Duration", tooltip = ["case_id", "avg_duration"], width=None),
71
+ state
72
+ )
73
+
74
+
75
+ def get_process_map( state: dict = {}):
76
+ df = state.get('df', pd.DataFrame()).copy()
77
+ net, img = discover_process_map( df, type='petrinet')
78
+ return img, state
79
+
80
+ def get_process_map_variants( top_k: int = 1, state: dict = {}):
81
+ """
82
+ """
83
+ df = state.get('df', pd.DataFrame()).copy()
84
+ dfg, start_activities, end_activities = discover_process_map_variants( df, top_k, type='dfg')
85
+ top_variant_connections = OrderedDict(sorted(dfg.items(), key=lambda item: item[1], reverse=True))
86
+ state['top_variant_connections'] = top_variant_connections
87
+ if 'top_variant' not in state and top_k == 1:
88
+ state['top_variant'] = {'dfg': dfg, 'start_activities': start_activities, 'end_activities': end_activities}
89
+ nx_graph = dfg2networkx( dfg, start_activities, end_activities)
90
+ chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp')
91
+ return chart, state
92
+
93
+
94
+ def get_process_map_activities_connections( activity_rank: int = 0, connection_rank: int = 0, state: dict = {}):
95
+ """
96
+ """
97
+ df = state.get('df', pd.DataFrame()).copy()
98
+ dfg, start_activities, end_activities = discover_process_map_activities_connections( df, activity_rank = activity_rank, connection_rank = connection_rank, state = state)
99
+ nx_graph = dfg2networkx( dfg, start_activities, end_activities)
100
+ chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp')
101
+ return chart, state
102
+
103
+
104
+ ## --- block --- ##
105
+ css = """
106
+ h1 {
107
+ text-align: center;
108
+ display:block;
109
+ }
110
+ """
111
+ demo = gr.Blocks(css = css)
112
+ with demo:
113
+ gr.Markdown("# 🌟 Process Dicovery 🌟")
114
+ state = gr.State(value={})
115
+ with gr.Row():
116
+ upl_btn = gr.UploadButton(label="Upload", file_types = ['.csv'], file_count = "single")
117
+ # with gr.Row('Data Preview'):
118
+ with gr.Accordion('Data Preview'):
119
+ df = gr.Dataframe()
120
+ upl_btn.upload( fn=get_data, inputs = [upl_btn, state], outputs=[df, state])
121
+
122
+ with gr.Row():
123
+ with gr.Tab('Data Explorer'):
124
+ # outputs.append(gr.Dataframe( label="Event logs"))
125
+ de_btn = gr.Button("Get Stats")
126
+ with gr.Row():
127
+ summary = gr.Dataframe( label="Summary", interactive=False, height=300)
128
+ chart1 = gr.BarPlot( label="Case Stats")
129
+ chart2 = gr.BarPlot( label="Case Lead Time Stats")
130
+ chart3 = gr.BarPlot( label="Case Average Activity Time Stats")
131
+ de_btn.click( fn=get_stats, inputs = [state], outputs=[ summary, chart1, chart2, chart3, state])
132
+ with gr.Tab('Variant Explorer'):
133
+ ve_btn = gr.Button("Get Variants")
134
+ top_k_variant_selector = gr.Slider(0, 10, value=1, step=1, label="Top-K", info="選擇 Variant 數量(0: 全選)")
135
+ pmchart = gr.Plot( label="Process Map")
136
+ ve_btn.click( fn=get_process_map_variants, inputs = [ top_k_variant_selector, state], outputs=[ pmchart, state])
137
+
138
+ with gr.Tab('Process Explorer'):
139
+ pe_btn = gr.Button("Get Activities & Connections")
140
+ with gr.Column():
141
+ top_k_activity_selector = gr.Slider(0, 10, value=1, step=1, label="Activity", info="【pending】增減 Top Activity 數量(0: 全選)")
142
+ top_k_connection_selector = gr.Slider(0, 10, value=1, step=1, label="Connection", info="增減 Top Connection 數量(0: 全選)")
143
+ pmchart = gr.Plot( label="Process Map")
144
+ pe_btn.click( fn=get_process_map_activities_connections, inputs = [ top_k_activity_selector, top_k_connection_selector, state], outputs=[ pmchart, state])
145
+
146
+ with gr.Tab('Process Model'):
147
+ cc_btn = gr.Button("Get Process Model")
148
+ img = gr.Image( label="Process Model")
149
+ cc_btn.click( fn=get_process_map, inputs = [state], outputs=[ img, state])
150
+
151
+
152
+ if __name__ == "__main__":
153
+
154
+ demo.launch(
155
+ # share=True,
156
+ server_name="0.0.0.0",
157
+ server_port=int(os.environ.get("PORT")),
158
+ auth=( os.environ.get("USER_NAME"), os.environ.get("PASSWORD"))
159
+ )
160
+
model.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Optional, Tuple, Any
3
+ from collections import OrderedDict
4
+
5
+ import pandas as pd
6
+ from loguru import logger
7
+ import pm4py
8
+ import plotly.graph_objects as go
9
+ import networkx as nx
10
+ import matplotlib.pyplot as plt
11
+ from PIL import Image
12
+ from pydantic import BaseModel
13
+
14
+
15
+ class ProcessMap(BaseModel):
16
+ net: Any
17
+ start_activities: List | None
18
+ end_activities: List | None
19
+ img: Any | None
20
+
21
+
22
+ def dfg2networkx( dfg, start, end):
23
+ """Dfg to networkx
24
+ Argument
25
+ dfg: a list of dict of edges from directly-follow-graph
26
+ start: a dict of start activities
27
+ end: a dict of end activities
28
+ Return
29
+ nx: networkx graph object
30
+ """
31
+ PROCESS_START = '#Start#'
32
+ PROCESS_END = '#End#'
33
+ nodes = { PROCESS_START: 0, PROCESS_END: 1}
34
+ node_idx = 2
35
+ for activity in start:
36
+ assert activity not in nodes, f"#ERROR: {activity} exists"
37
+ nodes[activity] = node_idx
38
+ node_idx += 1
39
+ for activity in end:
40
+ assert activity not in nodes, f"#ERROR: {activity} exists"
41
+ nodes[activity] = node_idx
42
+ node_idx += 1
43
+ for node in dfg:
44
+ left_activity = node[0]
45
+ if left_activity not in nodes:
46
+ nodes[left_activity] = node_idx
47
+ node_idx +=1
48
+ right_activity = node[1]
49
+ if right_activity not in nodes:
50
+ nodes[right_activity] = node_idx
51
+ node_idx +=1
52
+ nodes = list(nodes.keys())
53
+
54
+ edges = []
55
+ for activity in start:
56
+ from_id = str(PROCESS_START)
57
+ to_id = str(activity)
58
+ edges.append( ( PROCESS_START, activity) )
59
+ for activity in end:
60
+ from_id = str(activity)
61
+ to_id = str(PROCESS_END)
62
+ edges.append( ( activity, PROCESS_END) )
63
+ for transition in dfg:
64
+ edges.append( ( transition[0], transition[1]) )
65
+ nx_graph = nx.DiGraph()
66
+ nx_graph.add_nodes_from( nodes)
67
+ nx_graph.add_edges_from(edges)
68
+ return nx_graph
69
+
70
+
71
+ def discover_process_map_variants( df, top_k: int = 0, type: str = 'dfg'):
72
+ """Discover process map from data frame (raw event log)
73
+ Argument
74
+ df: a pandas dataframe
75
+ top_k: top k variants
76
+ type: dfg or petri
77
+ Return
78
+ dfg, start_activities, end_activities
79
+ """
80
+ event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
81
+ if top_k > 0:
82
+ event_log = pm4py.filter_variants_top_k( event_log, k = top_k)
83
+ dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
84
+ pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
85
+ return dfg, start_activities, end_activities
86
+
87
+
88
+ def discover_process_map_activities_connections( df, activity_rank: int = 0, connection_rank: int = 0, state: dict = {}, type: str = 'dfg'):
89
+ """Discover process map from data frame (raw event log)
90
+ Argument
91
+ df: a pandas dataframe
92
+ top_k: top k variants
93
+ type: dfg or petri
94
+ Return
95
+ dfg, start_activities, end_activities
96
+ """
97
+ event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
98
+ full_dfg, _, __ = pm4py.discover_dfg(event_log)
99
+ ranked_connections = OrderedDict(sorted(full_dfg.items(), key=lambda item: item[1], reverse=True))
100
+
101
+ if activity_rank > 0:
102
+ pass
103
+ if connection_rank > 0:
104
+ top_variant_connections = state.get('top_variant_connections', [])
105
+ filtered_connections = list(ranked_connections.keys())[ : (connection_rank+ len(ranked_connections))]
106
+ else:
107
+ filtered_connections = list(ranked_connections.keys())
108
+ event_log = pm4py.filter_directly_follows_relation( event_log, relations = filtered_connections)
109
+ dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
110
+ pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
111
+ return dfg, start_activities, end_activities
112
+
113
+
114
+ def discover_process_map( df: pd.DataFrame, type: str = 'dfg'):
115
+ """
116
+ """
117
+ event_log = pm4py.format_dataframe( df, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
118
+ if type=='dfg':
119
+ dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
120
+ pm4py.view_dfg(dfg, start_activities=start_activities, end_activities=end_activities)
121
+ return dfg, start_activities, end_activities
122
+ elif type=='petrinet':
123
+ net, im, fm = pm4py.discover_petri_net_inductive(event_log)
124
+ pm4py.view_petri_net( petri_net=net, initial_marking=im, final_marking=fm)
125
+ file_path = 'output/petri_net.png'
126
+ pm4py.save_vis_petri_net( net, im, fm, file_path)
127
+ img = Image.open(file_path)
128
+ return net, img
129
+ elif type=='bpmn':
130
+ net = pm4py.discover_bpmn_inductive(event_log)
131
+ pm4py.view_bpmn(net, format='png')
132
+ file_path = 'output/bpmn.png'
133
+ pm4py.save_vis_bpmn( net, file_path)
134
+ img = Image.open(file_path)
135
+ return net, img
136
+ else:
137
+ raise Exception(f"Invalid type: {type}")
138
+
139
+
140
+ def view_networkx( nx_graph, layout):
141
+ """
142
+ Argument
143
+ nx_graph
144
+ Return
145
+ graph object
146
+ fig.update_xaxes(showticklabels=False)
147
+ fig.update_yaxes(showticklabels=False)
148
+ """
149
+ # Create node scatter plot
150
+ node_trace = go.Scatter(
151
+ x=[layout[n][0] for n in nx_graph.nodes],
152
+ y=[layout[n][1] for n in nx_graph.nodes],
153
+ text=list(nx_graph.nodes),
154
+ mode='markers+text',
155
+ hovertext = [n for n in nx_graph.nodes],
156
+ textposition='top center',
157
+ marker=dict(size=5, color='LightSkyBlue', line=dict(width=2))
158
+ )
159
+
160
+ # Create edge lines
161
+ edge_trace = go.Scatter(
162
+ x=(),
163
+ y=(),
164
+ line=dict(width=1.5, color='#888'),
165
+ hoverinfo='none',
166
+ mode='lines'
167
+ )
168
+
169
+ # Add arrows for directed edges
170
+ annotations = []
171
+ for edge in nx_graph.edges:
172
+ x0, y0 = layout[edge[0]]
173
+ x1, y1 = layout[edge[1]]
174
+ edge_trace['x'] += (x0, x1, None)
175
+ edge_trace['y'] += (y0, y1, None)
176
+
177
+ # Calculate direction of the arrow
178
+ annotations.append(
179
+ dict(
180
+ ax=x0,
181
+ ay=y0,
182
+ axref='x',
183
+ ayref='y',
184
+ x=x1,
185
+ y=y1,
186
+ xref='x',
187
+ yref='y',
188
+ showarrow=True,
189
+ arrowhead=2,
190
+ arrowsize=1,
191
+ arrowwidth=2,
192
+ arrowcolor='Gray'
193
+ )
194
+ )
195
+
196
+ # Draw the figure
197
+ fig = go.Figure(data=[edge_trace, node_trace],
198
+ layout=go.Layout(
199
+ showlegend=False,
200
+ hovermode='closest',
201
+ margin=dict(b=0, l=0, r=0, t=0),
202
+ annotations=annotations,
203
+ xaxis=dict(showgrid=False, zeroline=False),
204
+ yaxis=dict(showgrid=False, zeroline=False)
205
+ ))
206
+ fig = fig.update_xaxes(showticklabels=False)
207
+ fig = fig.update_yaxes(showticklabels=False)
208
+ return fig
209
+
210
+
211
+ def view_process_map( nx_graph, process_type: str = 'dfg', layout_type: str = 'sfdp'):
212
+ """
213
+ """
214
+ layout = nx.nx_agraph.graphviz_layout( nx_graph, prog=layout_type)
215
+ fig = view_networkx(nx_graph, layout)
216
+ return fig
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ graphviz
2
+ graphviz-dev
process_map.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ argon2-cffi==23.1.0
6
+ argon2-cffi-bindings==21.2.0
7
+ arrow==1.3.0
8
+ asttokens==2.4.1
9
+ async-lru==2.0.4
10
+ attrs==23.2.0
11
+ Babel==2.15.0
12
+ beautifulsoup4==4.12.3
13
+ bleach==6.1.0
14
+ blinker==1.8.2
15
+ certifi==2024.6.2
16
+ cffi==1.16.0
17
+ charset-normalizer==3.3.2
18
+ click==8.1.7
19
+ comm==0.2.2
20
+ contourpy==1.2.1
21
+ cvxopt==1.3.2
22
+ cycler==0.12.1
23
+ dash==2.17.1
24
+ dash-core-components==2.0.0
25
+ dash-html-components==2.0.0
26
+ dash-table==5.0.0
27
+ debugpy==1.8.1
28
+ decorator==5.1.1
29
+ defusedxml==0.7.1
30
+ deprecation==2.1.0
31
+ dnspython==2.6.1
32
+ docopt==0.6.2
33
+ email_validator==2.1.1
34
+ exceptiongroup==1.2.1
35
+ executing==2.0.1
36
+ extratools==0.8.2.1
37
+ fastapi==0.111.0
38
+ fastapi-cli==0.0.4
39
+ fastjsonschema==2.20.0
40
+ ffmpy==0.3.2
41
+ filelock==3.15.1
42
+ Flask==3.0.3
43
+ fonttools==4.53.0
44
+ fqdn==1.5.1
45
+ fsspec==2024.6.0
46
+ gradio==4.36.1
47
+ gradio_client==1.0.1
48
+ graphviz==0.20.3
49
+ h11==0.14.0
50
+ httpcore==1.0.5
51
+ httptools==0.6.1
52
+ httpx==0.27.0
53
+ huggingface-hub==0.23.4
54
+ idna==3.7
55
+ importlib_metadata==7.1.0
56
+ importlib_resources==6.4.0
57
+ intervaltree==3.1.0
58
+ ipykernel==6.29.4
59
+ ipython==8.25.0
60
+ isoduration==20.11.0
61
+ itsdangerous==2.2.0
62
+ jedi==0.19.1
63
+ Jinja2==3.1.4
64
+ joblib==1.4.2
65
+ json5==0.9.25
66
+ jsonpointer==3.0.0
67
+ jsonschema==4.22.0
68
+ jsonschema-specifications==2023.12.1
69
+ jupyter-events==0.10.0
70
+ jupyter-lsp==2.2.5
71
+ jupyter_client==8.6.2
72
+ jupyter_core==5.7.2
73
+ jupyter_server==2.14.1
74
+ jupyter_server_terminals==0.5.3
75
+ jupyterlab==4.2.2
76
+ jupyterlab_pygments==0.3.0
77
+ jupyterlab_server==2.27.2
78
+ kiwisolver==1.4.5
79
+ loguru==0.7.2
80
+ lxml==5.2.2
81
+ markdown-it-py==3.0.0
82
+ MarkupSafe==2.1.5
83
+ matplotlib==3.9.0
84
+ matplotlib-inline==0.1.7
85
+ mdurl==0.1.2
86
+ mistune==3.0.2
87
+ nbclient==0.10.0
88
+ nbconvert==7.16.4
89
+ nbformat==5.10.4
90
+ nest-asyncio==1.6.0
91
+ networkx==3.3
92
+ notebook==7.2.1
93
+ notebook_shim==0.2.4
94
+ numpy==1.26.4
95
+ orjson==3.10.5
96
+ overrides==7.7.0
97
+ packaging==24.1
98
+ pandas==2.2.2
99
+ pandocfilters==1.5.1
100
+ parso==0.8.4
101
+ pexpect==4.9.0
102
+ pillow==10.3.0
103
+ platformdirs==4.2.2
104
+ plotly==5.22.0
105
+ # pm4py==2.7.11.11
106
+ -e ./pm4py
107
+ prefixspan==0.5.2
108
+ prettytable==3.10.0
109
+ prometheus_client==0.20.0
110
+ prompt_toolkit==3.0.47
111
+ psutil
112
+ ptyprocess
113
+ pure-eval==0.2.2
114
+ pycparser==2.22
115
+ pydantic==2.7.4
116
+ pydantic_core==2.18.4
117
+ pydotplus==2.0.2
118
+ pydub==0.25.1
119
+ pyecharts==2.0.6
120
+ Pygments==2.18.0
121
+ pygraphviz==1.13
122
+ pyparsing==3.1.2
123
+ python-dateutil==2.9.0.post0
124
+ python-dotenv==1.0.1
125
+ python-json-logger==2.0.7
126
+ python-multipart==0.0.9
127
+ pytz==2024.1
128
+ PyYAML==6.0.1
129
+ pyzmq==26.0.3
130
+ referencing==0.35.1
131
+ requests==2.32.3
132
+ retrying==1.3.4
133
+ rfc3339-validator==0.1.4
134
+ rfc3986-validator==0.1.1
135
+ rich==13.7.1
136
+ rpds-py==0.18.1
137
+ ruff==0.4.9
138
+ scikit-learn==1.5.0
139
+ scipy==1.13.1
140
+ semantic-version==2.10.0
141
+ Send2Trash==1.8.3
142
+ shellingham==1.5.4
143
+ simplejson==3.19.2
144
+ six==1.16.0
145
+ sniffio==1.3.1
146
+ sortedcontainers==2.4.0
147
+ soupsieve==2.5
148
+ stack-data==0.6.3
149
+ starlette==0.37.2
150
+ tenacity==8.3.0
151
+ terminado==0.18.1
152
+ threadpoolctl==3.5.0
153
+ tinycss2==1.3.0
154
+ tomli==2.0.1
155
+ tomlkit==0.12.0
156
+ toolz==0.12.1
157
+ tornado==6.4.1
158
+ tqdm==4.66.4
159
+ traitlets==5.14.3
160
+ typer==0.12.3
161
+ types-python-dateutil==2.9.0.20240316
162
+ typing_extensions==4.12.2
163
+ tzdata==2024.1
164
+ ujson==5.10.0
165
+ uri-template==1.3.0
166
+ urllib3==2.2.1
167
+ uvicorn==0.30.1
168
+ uvloop==0.19.0
169
+ watchfiles==0.22.0
170
+ wcwidth==0.2.13
171
+ webcolors==24.6.0
172
+ webencodings==0.5.1
173
+ websocket-client==1.8.0
174
+ websockets==11.0.3
175
+ Werkzeug==3.0.3
176
+ zipp==3.19.2
sapp.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import importlib
3
+
4
+ import gradio as gr
5
+
6
+ def greet(name):
7
+ parent_name = get_process_name()
8
+ return f"Hello {name}!! a greeting from {parent_name}"
9
+
10
+ def get_process_name():
11
+ if importlib.util.find_spec("psutil"):
12
+ import psutil
13
+ parent_pid = os.getppid()
14
+ try:
15
+ parent_name = str(psutil.Process(parent_pid).name())
16
+ return parent_name
17
+ except psutil.NoSuchProcess: # Catch the error caused by the process no longer existing
18
+ print("NoSuchProcess")
19
+ return "Uknown Process"
20
+
21
+
22
+ if __name__ == "__main__":
23
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
24
+ iface.launch()