Spaces:
Sleeping
Sleeping
Add 'pm4py/' from commit '80970016c5e1e79af7c37df0dd88e17587fe7bcf'
Browse filesgit-subtree-dir: pm4py
git-subtree-mainline: 2c091203bedd9c2047dfb083690b2da242c2a0e8
git-subtree-split: 80970016c5e1e79af7c37df0dd88e17587fe7bcf
This view is limited to 50 files because it contains too many changes.
See raw diff
- pm4py/.gitattributes +37 -0
- pm4py/.github/workflows/codeql-analysis.yml +71 -0
- pm4py/.gitignore +139 -0
- pm4py/CHANGELOG.md +0 -0
- pm4py/Dockerfile +36 -0
- pm4py/MANIFEST.in +2 -0
- pm4py/README.md +62 -0
- pm4py/docs/.buildinfo +4 -0
- pm4py/docs/.nojekyll +0 -0
- pm4py/docs/LICENSE_HEADER_GITHUB.txt +16 -0
- pm4py/docs/Makefile +20 -0
- pm4py/docs/README +12 -0
- pm4py/docs/header_script.py +15 -0
- pm4py/docs/make.bat +36 -0
- pm4py/docs/source/.gitignore +1 -0
- pm4py/docs/source/_static/css/custom.css +7 -0
- pm4py/docs/source/api.rst +683 -0
- pm4py/docs/source/conf.py +171 -0
- pm4py/docs/source/examples.rst +11 -0
- pm4py/docs/source/getting_started.rst +410 -0
- pm4py/docs/source/index.rst +32 -0
- pm4py/docs/source/install.rst +32 -0
- pm4py/docs/source/modules.rst +7 -0
- pm4py/docs/source/pm4py-logo.png +0 -0
- pm4py/docs/source/release_notes.rst +114 -0
- pm4py/examples/CHECK_MISSING.py +9 -0
- pm4py/examples/activities_to_alphabet.py +13 -0
- pm4py/examples/activity_position.py +16 -0
- pm4py/examples/align_approx_pt.py +27 -0
- pm4py/examples/align_decomposition_ex_paper.py +99 -0
- pm4py/examples/align_decomposition_example.py +29 -0
- pm4py/examples/alignment_discounted_a_star.py +52 -0
- pm4py/examples/alignment_test.py +50 -0
- pm4py/examples/all_optimal_alignments.py +18 -0
- pm4py/examples/antialignments_and_precision.py +23 -0
- pm4py/examples/backwards_token_replay.py +18 -0
- pm4py/examples/batch_detection.py +18 -0
- pm4py/examples/bpmn_from_pt_conversion.py +26 -0
- pm4py/examples/bpmn_import_and_to_petri_net.py +20 -0
- pm4py/examples/bpmn_js_visualization.py +15 -0
- pm4py/examples/case_overlap_stat.py +15 -0
- pm4py/examples/consecutive_act_case_grouping_filter.py +13 -0
- pm4py/examples/corr_mining.py +40 -0
- pm4py/examples/cost_based_dfg.py +19 -0
- pm4py/examples/cycle_time.py +14 -0
- pm4py/examples/data_petri_nets.py +46 -0
- pm4py/examples/dataframe_prefix_and_fea_extraction.py +23 -0
- pm4py/examples/dec_treplay_imdf.py +35 -0
- pm4py/examples/decisiontree_align_example.py +28 -0
- pm4py/examples/decisiontree_trivial_example.py +45 -0
pm4py/.gitattributes
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.sqlite filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.xes.gz filter=lfs diff=lfs merge=lfs -text
|
pm4py/.github/workflows/codeql-analysis.yml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# For most projects, this workflow file will not need changing; you simply need
|
2 |
+
# to commit it to your repository.
|
3 |
+
#
|
4 |
+
# You may wish to alter this file to override the set of languages analyzed,
|
5 |
+
# or to provide custom queries or build logic.
|
6 |
+
#
|
7 |
+
# ******** NOTE ********
|
8 |
+
# We have attempted to detect the languages in your repository. Please check
|
9 |
+
# the `language` matrix defined below to confirm you have the correct set of
|
10 |
+
# supported CodeQL languages.
|
11 |
+
#
|
12 |
+
name: "CodeQL"
|
13 |
+
|
14 |
+
on:
|
15 |
+
push:
|
16 |
+
branches: [ release ]
|
17 |
+
pull_request:
|
18 |
+
# The branches below must be a subset of the branches above
|
19 |
+
branches: [ release ]
|
20 |
+
schedule:
|
21 |
+
- cron: '25 5 * * 4'
|
22 |
+
|
23 |
+
jobs:
|
24 |
+
analyze:
|
25 |
+
name: Analyze
|
26 |
+
runs-on: ubuntu-latest
|
27 |
+
permissions:
|
28 |
+
actions: read
|
29 |
+
contents: read
|
30 |
+
security-events: write
|
31 |
+
|
32 |
+
strategy:
|
33 |
+
fail-fast: false
|
34 |
+
matrix:
|
35 |
+
language: [ 'python' ]
|
36 |
+
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
|
37 |
+
# Learn more:
|
38 |
+
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
|
39 |
+
|
40 |
+
steps:
|
41 |
+
- name: Checkout repository
|
42 |
+
uses: actions/checkout@v2
|
43 |
+
|
44 |
+
# Initializes the CodeQL tools for scanning.
|
45 |
+
- name: Initialize CodeQL
|
46 |
+
uses: github/codeql-action/init@v1
|
47 |
+
with:
|
48 |
+
languages: ${{ matrix.language }}
|
49 |
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
50 |
+
# By default, queries listed here will override any specified in a config file.
|
51 |
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
52 |
+
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
53 |
+
|
54 |
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
55 |
+
# If this step fails, then you should remove it and run the build manually (see below)
|
56 |
+
- name: Autobuild
|
57 |
+
uses: github/codeql-action/autobuild@v1
|
58 |
+
|
59 |
+
# ℹ️ Command-line programs to run using the OS shell.
|
60 |
+
# 📚 https://git.io/JvXDl
|
61 |
+
|
62 |
+
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
63 |
+
# and modify them (or add more) to build your code if your project
|
64 |
+
# uses a compiled language
|
65 |
+
|
66 |
+
#- run: |
|
67 |
+
# make bootstrap
|
68 |
+
# make release
|
69 |
+
|
70 |
+
- name: Perform CodeQL Analysis
|
71 |
+
uses: github/codeql-action/analyze@v1
|
pm4py/.gitignore
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/.idea
|
2 |
+
.vscode/
|
3 |
+
|
4 |
+
*.mps
|
5 |
+
*.sol
|
6 |
+
*debug.log*
|
7 |
+
|
8 |
+
# Byte-compiled / optimized / DLL files
|
9 |
+
__pycache__/
|
10 |
+
*.py[cod]
|
11 |
+
*$py.class
|
12 |
+
|
13 |
+
# C extensions
|
14 |
+
*.so
|
15 |
+
|
16 |
+
# Distribution / packaging
|
17 |
+
.Python
|
18 |
+
build/
|
19 |
+
develop-eggs/
|
20 |
+
dist/
|
21 |
+
downloads/
|
22 |
+
eggs/
|
23 |
+
.eggs/
|
24 |
+
lib/
|
25 |
+
lib64/
|
26 |
+
parts/
|
27 |
+
sdist/
|
28 |
+
var/
|
29 |
+
wheels/
|
30 |
+
pip-wheel-metadata/
|
31 |
+
share/python-wheels/
|
32 |
+
*.egg-info/
|
33 |
+
.installed.cfg
|
34 |
+
*.egg
|
35 |
+
MANIFEST
|
36 |
+
|
37 |
+
# PyInstaller
|
38 |
+
# Usually these files are written by a python script from a template
|
39 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
40 |
+
*.manifest
|
41 |
+
*.spec
|
42 |
+
|
43 |
+
# Installer logs
|
44 |
+
pip-log.txt
|
45 |
+
pip-delete-this-directory.txt
|
46 |
+
|
47 |
+
# Unit test / coverage reports
|
48 |
+
htmlcov/
|
49 |
+
.tox/
|
50 |
+
.nox/
|
51 |
+
.coverage
|
52 |
+
.coverage.*
|
53 |
+
.cache
|
54 |
+
nosetests.xml
|
55 |
+
coverage.xml
|
56 |
+
*.cover
|
57 |
+
*.py,cover
|
58 |
+
.hypothesis/
|
59 |
+
.pytest_cache/
|
60 |
+
|
61 |
+
# Translations
|
62 |
+
*.mo
|
63 |
+
*.pot
|
64 |
+
|
65 |
+
# Django stuff:
|
66 |
+
*.log
|
67 |
+
local_settings.py
|
68 |
+
db.sqlite3
|
69 |
+
db.sqlite3-journal
|
70 |
+
|
71 |
+
# Flask stuff:
|
72 |
+
instance/
|
73 |
+
.webassets-cache
|
74 |
+
|
75 |
+
# Scrapy stuff:
|
76 |
+
.scrapy
|
77 |
+
|
78 |
+
# Sphinx documentation
|
79 |
+
docs/_build/
|
80 |
+
|
81 |
+
# PyBuilder
|
82 |
+
target/
|
83 |
+
|
84 |
+
# Jupyter Notebook
|
85 |
+
.ipynb_checkpoints
|
86 |
+
|
87 |
+
# IPython
|
88 |
+
profile_default/
|
89 |
+
ipython_config.py
|
90 |
+
|
91 |
+
# pyenv
|
92 |
+
.python-version
|
93 |
+
|
94 |
+
# pipenv
|
95 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
96 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
97 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
98 |
+
# install all needed dependencies.
|
99 |
+
#Pipfile.lock
|
100 |
+
|
101 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
102 |
+
__pypackages__/
|
103 |
+
|
104 |
+
# Celery stuff
|
105 |
+
celerybeat-schedule
|
106 |
+
celerybeat.pid
|
107 |
+
|
108 |
+
# SageMath parsed files
|
109 |
+
*.sage.py
|
110 |
+
|
111 |
+
# Environments
|
112 |
+
.env
|
113 |
+
.venv
|
114 |
+
env/
|
115 |
+
venv/
|
116 |
+
ENV/
|
117 |
+
env.bak/
|
118 |
+
venv.bak/
|
119 |
+
|
120 |
+
# Spyder project settings
|
121 |
+
.spyderproject
|
122 |
+
.spyproject
|
123 |
+
|
124 |
+
# Rope project settings
|
125 |
+
.ropeproject
|
126 |
+
|
127 |
+
# mkdocs documentation
|
128 |
+
/site
|
129 |
+
|
130 |
+
# mypy
|
131 |
+
.mypy_cache/
|
132 |
+
.dmypy.json
|
133 |
+
dmypy.json
|
134 |
+
|
135 |
+
# Pyre type checker
|
136 |
+
.pyre/
|
137 |
+
|
138 |
+
# Renovate bot
|
139 |
+
renovate.json5
|
pm4py/CHANGELOG.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pm4py/Dockerfile
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.12.4-bookworm
|
2 |
+
|
3 |
+
RUN apt-get update
|
4 |
+
RUN apt-get -y upgrade
|
5 |
+
RUN apt-get -y install aptitude locate apt-file nano vim git zip unzip wget graphviz curl gnupg gnupg2 tini iputils-ping unixodbc-dev
|
6 |
+
RUN apt-get -y install gcc g++ flex bison pkg-config automake autoconf cmake
|
7 |
+
RUN apt-get -y install python3-dev python3-pydot python3-tk
|
8 |
+
RUN apt-get -y install libopenblas-dev liblapack-dev libboost-all-dev libncurses5-dev libtool libssl-dev libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev libclang-16-dev llvm-16-dev libthrift-dev libfftw3-dev
|
9 |
+
RUN python3 -m pip install --upgrade pip
|
10 |
+
RUN pip3 install deprecation==2.1.0 graphviz==0.20.3 intervaltree==3.1.0 networkx==3.3 packaging==24.1 python-dateutil==2.9.0.post0 pytz==2024.1 setuptools==70.1.1 six==1.16.0 sortedcontainers==2.4.0 tzdata==2024.1 wheel==0.43.0
|
11 |
+
RUN pip3 install colorama==0.4.6 cycler==0.12.1 pydotplus==2.0.2 pyparsing==3.1.2 tqdm==4.66.4
|
12 |
+
RUN pip3 install lxml==5.2.2 numpy==2.0.0 pandas==2.2.2 scipy==1.14.0
|
13 |
+
RUN pip3 install contourpy==1.2.1 fonttools==4.53.0 kiwisolver==1.4.5 matplotlib==3.9.0 pillow==10.4.0
|
14 |
+
RUN pip3 install anyio==4.4.0 asttokens==2.4.1 attrs==23.2.0 certifi==2024.6.2 charset-normalizer==3.3.2 convertdate==2.4.0 decorator==5.1.1 distro==1.9.0 executing==2.0.1 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 idna==3.7 ipython==8.26.0 jedi==0.19.1 Jinja2==3.1.4 jsonpickle==3.2.2 jsonschema-specifications==2023.12.1 lunardate==0.2.2 MarkupSafe==2.1.5 matplotlib-inline==0.1.7 parso==0.8.4 prompt-toolkit==3.0.47 pure-eval==0.2.2 pydantic==2.7.4 Pygments==2.18.0 pyluach==2.2.0 PyMeeus==0.5.12 referencing==0.35.1 rpds-py==0.18.1 sniffio==1.3.1 stack-data==0.6.3 traitlets==5.14.3 typing_extensions==4.12.2 urllib3==2.2.2 wcwidth==0.2.13
|
15 |
+
RUN pip3 install jsonschema==4.22.0 openai==1.35.7 pyvis==0.3.2 requests==2.32.3 workalendar==17.0.0
|
16 |
+
RUN pip3 install -U meson-python==0.15.0 Cython==3.0.10 ninja==1.11.1.1 spin==0.8 build==1.2.1 setuptools_scm==8.0.4
|
17 |
+
|
18 |
+
#RUN cd / && git clone https://github.com/numpy/numpy.git && cd /numpy && git submodule update --init && pip3 install .
|
19 |
+
#RUN cd / && git clone https://github.com/pandas-dev/pandas.git && cd /pandas && pip3 install .
|
20 |
+
#RUN cd / && git clone https://github.com/scipy/scipy.git && cd /scipy && git submodule update --init && pip3 install .
|
21 |
+
#RUN cd / && git clone https://github.com/lxml/lxml.git && cd /lxml && pip3 install .
|
22 |
+
#RUN cd / && git clone https://github.com/matplotlib/matplotlib.git && cd /matplotlib && pip3 install .
|
23 |
+
#RUN cd / && git clone https://github.com/duckdb/duckdb.git && cd /duckdb && make && cd /duckdb/tools/pythonpkg && pip3 install .
|
24 |
+
#RUN cd / && git clone https://github.com/apache/arrow.git && export ARROW_HOME=/dist && export LD_LIBRARY_PATH=/dist/lib:$LD_LIBRARY_PATH && export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH && cd /arrow/ && mkdir cpp/build && cd cpp/build && cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_BUILD_TYPE=Debug -DARROW_BUILD_TESTS=ON -DARROW_COMPUTE=ON -DARROW_CSV=ON -DARROW_DATASET=ON -DARROW_FILESYSTEM=ON -DARROW_HDFS=ON -DARROW_JSON=ON -DARROW_PARQUET=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DPARQUET_REQUIRE_ENCRYPTION=ON .. && make -j4 && make install && cd /arrow/python && export PYARROW_WITH_PARQUET=1 && export PYARROW_WITH_DATASET=1 && export PYARROW_PARALLEL=4 && python3 setup.py build_ext --inplace && python3 setup.py install
|
25 |
+
#RUN cd / && git clone https://github.com/python-greenlet/greenlet && cd /greenlet && pip3 install .
|
26 |
+
#RUN cd / && git clone https://github.com/sqlalchemy/sqlalchemy.git && cd /sqlalchemy && pip3 install .
|
27 |
+
#RUN cd / && git clone https://github.com/mkleehammer/pyodbc.git && cd /pyodbc && pip3 install .
|
28 |
+
|
29 |
+
#RUN cd / && git clone https://github.com/scikit-learn/scikit-learn.git && cd /scikit-learn && pip3 install .
|
30 |
+
#RUN cd / && git clone https://github.com/chuanconggao/PrefixSpan-py.git && cd /PrefixSpan-py && pip3 install .
|
31 |
+
#RUN cd / && git clone https://github.com/wmayner/pyemd.git && cd /pyemd && pip3 install .
|
32 |
+
#RUN cd / && wget https://ftp.gnu.org/gnu/glpk/glpk-5.0.tar.gz && tar xzvf glpk-5.0.tar.gz && cd /glpk-5.0 && ./configure && make && make install
|
33 |
+
#RUN cd / && git clone https://github.com/cvxopt/cvxopt.git && cd /cvxopt && sed -i 's/BUILD_GLPK = 0/BUILD_GLPK = 1/' setup.py && python3 setup.py build && python3 setup.py install
|
34 |
+
|
35 |
+
COPY . /app
|
36 |
+
RUN cd /app && pip3 install --no-deps .
|
pm4py/MANIFEST.in
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
recursive-include pm4py *.svg
|
2 |
+
recursive-include pm4py *.html
|
pm4py/README.md
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pm4py
|
2 |
+
pm4py is a python library that supports (state-of-the-art) process mining algorithms in python.
|
3 |
+
It is open source (licensed under GPL) and intended to be used in both academia and industry projects.
|
4 |
+
pm4py is a product of the Fraunhofer Institute for Applied Information Technology.
|
5 |
+
|
6 |
+
## Documentation / API
|
7 |
+
The full documentation of pm4py can be found at https://pm4py.fit.fraunhofer.de
|
8 |
+
|
9 |
+
## First Example
|
10 |
+
A very simple example, to whet your appetite:
|
11 |
+
|
12 |
+
```python
|
13 |
+
import pm4py
|
14 |
+
|
15 |
+
if __name__ == "__main__":
|
16 |
+
log = pm4py.read_xes('<path-to-xes-log-file.xes>')
|
17 |
+
net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)
|
18 |
+
pm4py.view_petri_net(net, initial_marking, final_marking, format="svg")
|
19 |
+
```
|
20 |
+
|
21 |
+
## Installation
|
22 |
+
pm4py can be installed on Python 3.9.x / 3.10.x / 3.11.x / 3.12.x by invoking:
|
23 |
+
*pip install -U pm4py*
|
24 |
+
|
25 |
+
pm4py is also running on older Python environments with different requirements sets, including:
|
26 |
+
- Python 3.8 (3.8.10): third_party/old_python_deps/requirements_py38.txt
|
27 |
+
|
28 |
+
## Requirements
|
29 |
+
pm4py depends on some other Python packages, with different levels of importance:
|
30 |
+
* *Essential requirements*: numpy, pandas, deprecation, networkx
|
31 |
+
* *Normal requirements* (installed by default with the pm4py package, important for mainstream usage): graphviz, intervaltree, lxml, matplotlib, pydotplus, pytz, scipy, tqdm
|
32 |
+
* *Optional requirements* (not installed by default): requests, pyvis, jsonschema, workalendar, pyarrow, scikit-learn, polars, openai, pyemd, pyaudio, pydub, pygame, pywin32, pygetwindow, pynput
|
33 |
+
|
34 |
+
## Release Notes
|
35 |
+
To track the incremental updates, please refer to the *CHANGELOG* file.
|
36 |
+
|
37 |
+
## Third Party Dependencies
|
38 |
+
As scientific library in the Python ecosystem, we rely on external libraries to offer our features.
|
39 |
+
In the */third_party* folder, we list all the licenses of our direct dependencies.
|
40 |
+
Please check the */third_party/LICENSES_TRANSITIVE* file to get a full list of all transitive dependencies and the corresponding license.
|
41 |
+
|
42 |
+
## Citing pm4py
|
43 |
+
If you are using pm4py in your scientific work, please cite pm4py as follows:
|
44 |
+
|
45 |
+
**Alessandro Berti, Sebastiaan van Zelst, Daniel Schuster**. (2023). *PM4Py: A process mining library for Python*. Software Impacts, 17, 100556. [DOI](https://doi.org/10.1016/j.simpa.2023.100556) | [Article Link](https://www.sciencedirect.com/science/article/pii/S2665963823000933)
|
46 |
+
|
47 |
+
BiBTeX:
|
48 |
+
|
49 |
+
```bibtex
|
50 |
+
@article{pm4py,
|
51 |
+
title = {PM4Py: A process mining library for Python},
|
52 |
+
journal = {Software Impacts},
|
53 |
+
volume = {17},
|
54 |
+
pages = {100556},
|
55 |
+
year = {2023},
|
56 |
+
issn = {2665-9638},
|
57 |
+
doi = {https://doi.org/10.1016/j.simpa.2023.100556},
|
58 |
+
url = {https://www.sciencedirect.com/science/article/pii/S2665963823000933},
|
59 |
+
author = {Alessandro Berti and Sebastiaan van Zelst and Daniel Schuster},
|
60 |
+
}
|
61 |
+
```
|
62 |
+
|
pm4py/docs/.buildinfo
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Sphinx build info version 1
|
2 |
+
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
3 |
+
config: 5eb7f31d57fb2354f38aa0b1fbc31deb
|
4 |
+
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
pm4py/docs/.nojekyll
ADDED
File without changes
|
pm4py/docs/LICENSE_HEADER_GITHUB.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
|
3 |
+
|
4 |
+
PM4Py is free software: you can redistribute it and/or modify
|
5 |
+
it under the terms of the GNU General Public License as published by
|
6 |
+
the Free Software Foundation, either version 3 of the License, or
|
7 |
+
(at your option) any later version.
|
8 |
+
|
9 |
+
PM4Py is distributed in the hope that it will be useful,
|
10 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12 |
+
GNU General Public License for more details.
|
13 |
+
|
14 |
+
You should have received a copy of the GNU General Public License
|
15 |
+
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
|
16 |
+
'''
|
pm4py/docs/Makefile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Minimal makefile for Sphinx documentation
|
2 |
+
#
|
3 |
+
|
4 |
+
# You can set these variables from the command line.
|
5 |
+
SPHINXOPTS =
|
6 |
+
SPHINXBUILD = sphinx-build
|
7 |
+
SPHINXPROJ = pm4py
|
8 |
+
SOURCEDIR = source
|
9 |
+
BUILDDIR = build
|
10 |
+
|
11 |
+
# Put it first so that "make" without argument is like "make help".
|
12 |
+
help:
|
13 |
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
14 |
+
|
15 |
+
.PHONY: help Makefile
|
16 |
+
|
17 |
+
# Catch-all target: route all unknown targets to Sphinx using the new
|
18 |
+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
19 |
+
%: Makefile
|
20 |
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
pm4py/docs/README
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This document describes how to generate API documentation using sphinx.
|
2 |
+
|
3 |
+
First, install sphinx (we assume you are able to do this via the official sphinx website).
|
4 |
+
|
5 |
+
To generate the docs perform the following steps.
|
6 |
+
1. First create all the actuall .rst files that define all the documentation.
|
7 |
+
In cmd/terminal browse to the docs folder (this folder ;-)) and run:
|
8 |
+
> sphinx-apidoc --module-first -o source ../
|
9 |
+
You can optionally opt to choose to remove all the .rst files from the source folder (except for index.rst!!!!)
|
10 |
+
|
11 |
+
2. Generate corresponding html files:
|
12 |
+
make html
|
pm4py/docs/header_script.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
|
3 |
+
if __name__ == '__main__':
|
4 |
+
LICENSE_HEADER_FILE_PATH = r'LICENSE_HEADER_GITHUB.txt'
|
5 |
+
with open(LICENSE_HEADER_FILE_PATH, 'r') as license_file:
|
6 |
+
license = license_file.read()
|
7 |
+
for filename in glob.iglob('../pm4py/' + '**/*.py', recursive=True):
|
8 |
+
with open(filename, 'r', encoding='utf-8') as original:
|
9 |
+
data = original.read()
|
10 |
+
if (data.find(license) == -1):
|
11 |
+
with open(filename, 'w', encoding='utf-8') as modified:
|
12 |
+
print('adding license to: ' + filename)
|
13 |
+
modified.write(license + '\n' + data)
|
14 |
+
else:
|
15 |
+
print('skipping: ' + filename)
|
pm4py/docs/make.bat
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@ECHO OFF
|
2 |
+
|
3 |
+
pushd %~dp0
|
4 |
+
|
5 |
+
REM Command file for Sphinx documentation
|
6 |
+
|
7 |
+
if "%SPHINXBUILD%" == "" (
|
8 |
+
set SPHINXBUILD=sphinx-build
|
9 |
+
)
|
10 |
+
set SOURCEDIR=source
|
11 |
+
set BUILDDIR=build
|
12 |
+
set SPHINXPROJ=pm4py
|
13 |
+
|
14 |
+
if "%1" == "" goto help
|
15 |
+
|
16 |
+
%SPHINXBUILD% >NUL 2>NUL
|
17 |
+
if errorlevel 9009 (
|
18 |
+
echo.
|
19 |
+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
20 |
+
echo.installed, then set the SPHINXBUILD environment variable to point
|
21 |
+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
22 |
+
echo.may add the Sphinx directory to PATH.
|
23 |
+
echo.
|
24 |
+
echo.If you don't have Sphinx installed, grab it from
|
25 |
+
echo.http://sphinx-doc.org/
|
26 |
+
exit /b 1
|
27 |
+
)
|
28 |
+
|
29 |
+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
|
30 |
+
goto end
|
31 |
+
|
32 |
+
:help
|
33 |
+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
|
34 |
+
|
35 |
+
:end
|
36 |
+
popd
|
pm4py/docs/source/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
generated/*
|
pm4py/docs/source/_static/css/custom.css
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.bd-header {
|
2 |
+
display: none;
|
3 |
+
}
|
4 |
+
|
5 |
+
.bd-footer {
|
6 |
+
display: none;
|
7 |
+
}
|
pm4py/docs/source/api.rst
ADDED
@@ -0,0 +1,683 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
API Reference
|
2 |
+
=============
|
3 |
+
This page provides an overview of all public ``pm4py`` objects, functions and methods.
|
4 |
+
|
5 |
+
Input (:mod:`pm4py.read`)
|
6 |
+
---------------------------------
|
7 |
+
``pm4py`` supports importing the following standardized *event data* format:
|
8 |
+
|
9 |
+
* ``.xes`` files (`xes-standard <https://xes-standard.org/>`_); General interchange format for event data. :meth:`pm4py.read.read_xes`
|
10 |
+
|
11 |
+
In case an event log is stored as a ``.csv`` file, ``pandas`` can be used to directly import the event log as a ``data frame`` (`docs <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_).
|
12 |
+
``.xes`` files are internally converted to a ``pandas dataframe``, which is the default data structure used by all algorithms implemented in ``pm4py``.
|
13 |
+
|
14 |
+
Additional file formats that are currently supported by pm4py are:
|
15 |
+
|
16 |
+
* ``.bpmn`` files; File format specifying process models in the *BPMN* process modeling formalism :meth:`pm4py.read.read_bpmn`
|
17 |
+
* ``.dfg`` files; File format specifying *directly follows graphs* (also referred to as *process maps*) :meth:`pm4py.read.read_dfg`
|
18 |
+
* ``.pnml`` files; File format specifying *Petri net* models :meth:`pm4py.read.read_pnml`
|
19 |
+
* ``.ptml`` files; File format specifying *Process Tree* models :meth:`pm4py.read.read_ptml`
|
20 |
+
|
21 |
+
Importing object-centric event logs is possible given the following formats:
|
22 |
+
|
23 |
+
* ``.csv`` specification :meth:`pm4py.read.read_ocel_csv`
|
24 |
+
* ``.jsonocel`` specification :meth:`pm4py.read.read_ocel_json`
|
25 |
+
* ``.xmlocel`` specification :meth:`pm4py.read.read_ocel_xml`
|
26 |
+
* ``.sqlite`` specification :meth:`pm4py.read.read_ocel_sqlite`
|
27 |
+
|
28 |
+
Importing object-centric event logs (OCEL2.0) is possible given the following formats:
|
29 |
+
|
30 |
+
* ``.xmlocel`` specification :meth:`pm4py.read.read_ocel2_xml`
|
31 |
+
* ``.sqlite`` specification :meth:`pm4py.read.read_ocel2_sqlite`
|
32 |
+
* ``.jsonocel`` specification :meth:`pm4py.read.read_ocel2_json`
|
33 |
+
|
34 |
+
|
35 |
+
Output (:mod:`pm4py.write`)
|
36 |
+
-------------------------------------
|
37 |
+
Similarly to event data importing, ``pm4py`` supports export functionalities to:
|
38 |
+
|
39 |
+
* ``.bpmn`` files, :meth:`pm4py.write.write_bpmn`
|
40 |
+
* ``.dfg`` files, :meth:`pm4py.write.write_dfg`
|
41 |
+
* ``.pnml`` files, :meth:`pm4py.write.write_pnml`
|
42 |
+
* ``.ptml`` files, :meth:`pm4py.write.write_ptml`
|
43 |
+
* ``.xes`` files. :meth:`pm4py.write.write_xes`
|
44 |
+
|
45 |
+
Exporting object-centric event logs is possible to the following formats:
|
46 |
+
|
47 |
+
* ``.csv`` specification :meth:`pm4py.write.write_ocel_csv`
|
48 |
+
* ``.jsonocel`` specification :meth:`pm4py.write.write_ocel_json`
|
49 |
+
* ``.xmlocel`` specification :meth:`pm4py.write.write_ocel_xml`
|
50 |
+
* ``.sqlite`` specification :meth:`pm4py.write.write_ocel_sqlite`
|
51 |
+
|
52 |
+
Exporting object-centric event logs (OCEL2.0) is possible to the following formats:
|
53 |
+
|
54 |
+
* ``.xmlocel`` specification :meth:`pm4py.write.write_ocel2_xml`
|
55 |
+
* ``.sqlite`` specification :meth:`pm4py.write.write_ocel2_sqlite`
|
56 |
+
* ``.jsonocel`` specification :meth:`pm4py.write.write_ocel2_json`
|
57 |
+
|
58 |
+
|
59 |
+
Conversion (:mod:`pm4py.convert`)
|
60 |
+
-------------------------------------
|
61 |
+
Several conversions are available from/to different objects supported by ``pm4py``.
|
62 |
+
The following conversions are currently available:
|
63 |
+
|
64 |
+
* :meth:`pm4py.convert.convert_to_bpmn` converts a process model to BPMN
|
65 |
+
* :meth:`pm4py.convert.convert_to_petri_net` converts a process model to Petri net
|
66 |
+
* :meth:`pm4py.convert.convert_to_process_tree` converts a process model to a process tree
|
67 |
+
* :meth:`pm4py.convert.convert_to_reachability_graph` converts a process model to a reachability graph
|
68 |
+
* :meth:`pm4py.convert.convert_log_to_ocel` converts an event log to an object-centric event log
|
69 |
+
* :meth:`pm4py.convert.convert_log_to_networkx` converts a traditional event log (dataframe) to a directed graph (NetworkX)
|
70 |
+
* :meth:`pm4py.convert.convert_ocel_to_networkx` converts an object-centric event log to a directed graph (NetworkX)
|
71 |
+
* :meth:`pm4py.convert.convert_petri_net_to_networkx` converts an accepting Petri net to a directed graph (NetworkX)
|
72 |
+
* :meth:`pm4py.convert.convert_petri_net_type` change the Petri net internal type
|
73 |
+
|
74 |
+
|
75 |
+
Process Discovery (:mod:`pm4py.discovery`)
|
76 |
+
------------------------------------------
|
77 |
+
Process Discovery algorithms discover a process model that describes the process execution, as stored in the event log.
|
78 |
+
``pm4py`` implements a variety of different process discovery algorithms.
|
79 |
+
These different algorithms return different kinds of models, i.e., models with *imprecise execution semantics*, *procedural process models* and *declarative process models*.
|
80 |
+
Among the models with *imprecise execution semantics*, ``pm4py`` currently supports:
|
81 |
+
|
82 |
+
* :meth:`pm4py.discovery.discover_dfg`; discovers a *directly follows graph* annotated with frequency information (based on the log).
|
83 |
+
* :meth:`pm4py.discovery.discover_performance_dfg`; discovers a *directly follows graph* annotated with performance infomration (based on the log).
|
84 |
+
|
85 |
+
Among *procedural process models*, ``pm4py`` currently supports:
|
86 |
+
|
87 |
+
* :meth:`pm4py.discovery.discover_petri_net_alpha`; discovers a *Petri net* using the Alpha Miner algorithm.
|
88 |
+
* :meth:`pm4py.discovery.discover_petri_net_inductive`; discovers a *Petri net* using the Inductive Miner algorithm.
|
89 |
+
* :meth:`pm4py.discovery.discover_petri_net_heuristics`; discovers a *Petri net* using the Heuristics Miner algorithm.
|
90 |
+
* :meth:`pm4py.discovery.discover_petri_net_ilp`; discovers a *Petri net* using the ILP Miner algorithm.
|
91 |
+
* :meth:`pm4py.discovery.discover_process_tree_inductive`; discovers a *process tree* using the Inductive Miner algorithm.
|
92 |
+
* :meth:`pm4py.discovery.discover_bpmn_inductive`; discovers a *BPMN model* using the Inductive Miner algorithm.
|
93 |
+
* :meth:`pm4py.discovery.discover_heuristics_net`; discovers an *heuristics net* using the Heuristics Miner algorithm.
|
94 |
+
* :meth:`pm4py.discovery.discover_footprints`; discovers the *footprints matrix* of the log or the model.
|
95 |
+
* :meth:`pm4py.discovery.discover_powl`; discovers a *partial order workflow language* (POWL) model.
|
96 |
+
|
97 |
+
Among *declarative process models*, ``pm4py`` currently supports:
|
98 |
+
|
99 |
+
* :meth:`pm4py.discovery.discover_declare`; discovers a *DECLARE* model.
|
100 |
+
* :meth:`pm4py.discovery.discover_log_skeleton`; discovers a *log skeleton*.
|
101 |
+
* :meth:`pm4py.discovery.discover_temporal_profile`; discovers a *temporal profile*.
|
102 |
+
|
103 |
+
|
104 |
+
Conformance Checking (:mod:`pm4py.conformance`)
|
105 |
+
-----------------------------------------------
|
106 |
+
Conformance checking techniques compare a process model with an event log of the same process. The goal is to check if the event log conforms to the model, and, vice versa.
|
107 |
+
Among procedural process models, ``pm4py`` currently supports:
|
108 |
+
|
109 |
+
* :meth:`pm4py.conformance.conformance_diagnostics_token_based_replay`; token-based replay between the event log and a *Petri net*.
|
110 |
+
* :meth:`pm4py.conformance.conformance_diagnostics_alignments`; alignment-based replay between the event log and a *Petri net*.
|
111 |
+
* :meth:`pm4py.conformance.conformance_diagnostics_footprints`; footprints-based conformance diagnostics.
|
112 |
+
* :meth:`pm4py.conformance.fitness_token_based_replay`; evaluation of the fitness between an event log and a *Petri net* using token-based replay.
|
113 |
+
* :meth:`pm4py.conformance.fitness_alignments`; evaluation of the fitness between an event log and a *Petri net* using alignments.
|
114 |
+
* :meth:`pm4py.conformance.fitness_footprints`; evaluation of the fitness based on footprints.
|
115 |
+
* :meth:`pm4py.conformance.precision_token_based_replay`; evaluation of the precision between an event log and a *Petri net* using token-based replay.
|
116 |
+
* :meth:`pm4py.conformance.precision_alignments`; evaluation of the precision between an event log and a *Petri net* using alignments.
|
117 |
+
* :meth:`pm4py.conformance.precision_footprints`; evaluation of the precision based on footprints.
|
118 |
+
* :meth:`pm4py.conformance.replay_prefix_tbr`; replays a prefix (list of activities) on a given *Petri net*, using Token-Based Replay.
|
119 |
+
|
120 |
+
Among declarative process models, ``pm4py`` currently supports:
|
121 |
+
|
122 |
+
* :meth:`pm4py.conformance.conformance_log_skeleton`; conformance checking using the *log skeleton*.
|
123 |
+
* :meth:`pm4py.conformance.conformance_declare`; conformance checking using a *DECLARE model*.
|
124 |
+
* :meth:`pm4py.conformance.conformance_temporal_profile`; conformance checking using the *temporal profile*.
|
125 |
+
|
126 |
+
|
127 |
+
Visualization (:mod:`pm4py.vis`)
|
128 |
+
------------------------------------------
|
129 |
+
The ``pm4py`` library implements basic visualizations of process models and statistics.
|
130 |
+
Among the on-screen visualizations, ``pm4py`` currently supports:
|
131 |
+
|
132 |
+
* :meth:`pm4py.vis.view_petri_net`; views a *Petri net* model.
|
133 |
+
* :meth:`pm4py.vis.view_dfg`; views a *directly-follows graph* annotated with the frequency.
|
134 |
+
* :meth:`pm4py.vis.view_performance_dfg`; views a *directly-follows graph* annotated with the performance.
|
135 |
+
* :meth:`pm4py.vis.view_process_tree`; views a *process tree*.
|
136 |
+
* :meth:`pm4py.vis.view_bpmn`; views a *BPMN model*.
|
137 |
+
* :meth:`pm4py.vis.view_heuristics_net`; views an *heuristics net*.
|
138 |
+
* :meth:`pm4py.vis.view_dotted_chart`; views a *dotted chart*
|
139 |
+
* :meth:`pm4py.vis.view_sna`; views the results of a *social network analysis*.
|
140 |
+
* :meth:`pm4py.vis.view_case_duration_graph`; views the *case duration graph*.
|
141 |
+
* :meth:`pm4py.vis.view_events_per_time_graph`; views the *events per time graph*.
|
142 |
+
* :meth:`pm4py.vis.view_performance_spectrum`; views the *performance spectrum*.
|
143 |
+
* :meth:`pm4py.vis.view_events_distribution_graph`; views the *events distribution graph*.
|
144 |
+
* :meth:`pm4py.vis.view_ocdfg`; views an *object-centric directly-follows graph*.
|
145 |
+
* :meth:`pm4py.vis.view_ocpn`; views an *object-centric Petri net*.
|
146 |
+
* :meth:`pm4py.vis.view_object_graph`; views an *object-based graph*.
|
147 |
+
* :meth:`pm4py.vis.view_network_analysis`; views the results of a *network analysis*.
|
148 |
+
* :meth:`pm4py.vis.view_transition_system`; views the results of a *transition system*.
|
149 |
+
* :meth:`pm4py.vis.view_prefix_tree`; views a *prefix tree*.
|
150 |
+
* :meth:`pm4py.vis.view_alignments`; views the *alignments table*.
|
151 |
+
* :meth:`pm4py.vis.view_footprints`; views a *footprints table*.
|
152 |
+
* :meth:`pm4py.vis.view_powl`; views a *POWL model*.
|
153 |
+
|
154 |
+
We offer also some methods to store the visualizations on the disk:
|
155 |
+
|
156 |
+
* :meth:`pm4py.vis.save_vis_petri_net`; saves the visualization of a *Petri net* model.
|
157 |
+
* :meth:`pm4py.vis.save_vis_dfg`; saves the visualization of a *directly-follows graph* annotated with the frequency.
|
158 |
+
* :meth:`pm4py.vis.save_vis_performance_dfg`; saves the visualization of a *directly-follows graph* annotated with the performance.
|
159 |
+
* :meth:`pm4py.vis.save_vis_process_tree`; saves the visualization of a *process tree*.
|
160 |
+
* :meth:`pm4py.vis.save_vis_bpmn`; saves the visualization of a *BPMN model*.
|
161 |
+
* :meth:`pm4py.vis.save_vis_heuristics_net`; saves the visualization of an *heuristics net*.
|
162 |
+
* :meth:`pm4py.vis.save_vis_dotted_chart`; saves the visualization of a *dotted chart*
|
163 |
+
* :meth:`pm4py.vis.save_vis_sna`; saves the visualization of the results of a *social network analysis*.
|
164 |
+
* :meth:`pm4py.vis.save_vis_case_duration_graph`; saves the visualization of the *case duration graph*.
|
165 |
+
* :meth:`pm4py.vis.save_vis_events_per_time_graph`; saves the visualization of the *events per time graph*.
|
166 |
+
* :meth:`pm4py.vis.save_vis_performance_spectrum`; saves the visualization of the *performance spectrum*.
|
167 |
+
* :meth:`pm4py.vis.save_vis_events_distribution_graph`; saves the visualization of the *events distribution graph*.
|
168 |
+
* :meth:`pm4py.vis.save_vis_ocdfg`; saves the visualization of an *object-centric directly-follows graph*.
|
169 |
+
* :meth:`pm4py.vis.save_vis_ocpn`; saves the visualization of an *object-centric Petri net*.
|
170 |
+
* :meth:`pm4py.vis.save_vis_object_graph`; saves the visualization of an *object-based graph*.
|
171 |
+
* :meth:`pm4py.vis.save_vis_network_analysis`; saves the visualization of the results of a *network analysis*.
|
172 |
+
* :meth:`pm4py.vis.save_vis_transition_system`; saves the visualization of the results of a *transition system*.
|
173 |
+
* :meth:`pm4py.vis.save_vis_prefix_tree`; saves the visualization of a *prefix tree*.
|
174 |
+
* :meth:`pm4py.vis.save_vis_alignments`; saves the visualization of the *alignments table*.
|
175 |
+
* :meth:`pm4py.vis.save_vis_footprints`; saves the visualization of the *footprints table*.
|
176 |
+
* :meth:`pm4py.vis.save_vis_powl`; saves the visualization of a *POWL model*.
|
177 |
+
|
178 |
+
|
179 |
+
Statistics (:mod:`pm4py.stats`)
|
180 |
+
------------------------------------------
|
181 |
+
Different statistics that could be computed on top of event logs are proposed, including:
|
182 |
+
|
183 |
+
* :meth:`pm4py.stats.get_start_activities`; gets the *start activities* from the event log.
|
184 |
+
* :meth:`pm4py.stats.get_end_activities`; gets the *end activities* from the event log.
|
185 |
+
* :meth:`pm4py.stats.get_event_attributes`; gets the *attributes at the event level* of the event log.
|
186 |
+
* :meth:`pm4py.stats.get_trace_attributes`; gets the *attributes at the trace level* of the event log.
|
187 |
+
* :meth:`pm4py.stats.get_event_attribute_values`; gets the values of an *attribute at the event level* of the event log.
|
188 |
+
* :meth:`pm4py.stats.get_trace_attribute_values`; gets the values of an *attribute at the trace level* of the event log.
|
189 |
+
* :meth:`pm4py.stats.get_variants`; gets the *variants* of the event log.
|
190 |
+
* :meth:`pm4py.stats.split_by_process_variant`; splits an event log into sub-dataframes for each process variant.
|
191 |
+
* :meth:`pm4py.stats.get_variants_paths_duration`; method that associates to a log object a Pandas dataframe aggregated by variants and positions (inside the variant).
|
192 |
+
* :meth:`pm4py.stats.get_frequent_trace_segments`; gets the *traces* (segments of activities) of the event log.
|
193 |
+
* :meth:`pm4py.stats.get_case_arrival_average`; gets the *average case arrival rate* from the event log.
|
194 |
+
* :meth:`pm4py.stats.get_cycle_time`; gets the *cycle time* from the event log.
|
195 |
+
* :meth:`pm4py.stats.get_all_case_durations`; gets the list of *case durations* for the cases of the event log.
|
196 |
+
* :meth:`pm4py.stats.get_case_duration`; gets the *case duration* of a specific case in the log.
|
197 |
+
* :meth:`pm4py.stats.get_stochastic_language`; gets the *stochastic language* of an event log or a process model.
|
198 |
+
* :meth:`pm4py.stats.get_service_time`; gets the average *service time* per activity.
|
199 |
+
|
200 |
+
|
201 |
+
Filtering (:mod:`pm4py.filtering`)
|
202 |
+
------------------------------------------
|
203 |
+
Filtering is the restriction of the event log to a subset of the behavior.
|
204 |
+
Different methods are offered in pm4py for traditional event logs (.xes, .csv), including:
|
205 |
+
|
206 |
+
* :meth:`pm4py.filtering.filter_start_activities`; filters the *start activities* of the event log.
|
207 |
+
* :meth:`pm4py.filtering.filter_end_activities`; filters the *end activities* of the event log.
|
208 |
+
* :meth:`pm4py.filtering.filter_event_attribute_values`; filters the values of an *attribute at the event level* of the event log.
|
209 |
+
* :meth:`pm4py.filtering.filter_trace_attribute_values`; filters the values of an *attribute at the trace level* of the event log.
|
210 |
+
* :meth:`pm4py.filtering.filter_variants`; filters the *variants* of an event log.
|
211 |
+
* :meth:`pm4py.filtering.filter_directly_follows_relation`; filters the *DF-relations* of an event log.
|
212 |
+
* :meth:`pm4py.filtering.filter_eventually_follows_relation`; filters the *EF-relations* of an event log.
|
213 |
+
* :meth:`pm4py.filtering.filter_time_range`; filters an event log on a temporal interval.
|
214 |
+
* :meth:`pm4py.filtering.filter_between`; filters an event log between a given couple of activities.
|
215 |
+
* :meth:`pm4py.filtering.filter_case_size`; filters an event log on the size of the cases.
|
216 |
+
* :meth:`pm4py.filtering.filter_case_performance`; filters an event log on the throughput time of the cases.
|
217 |
+
* :meth:`pm4py.filtering.filter_activities_rework`; filters an event log by looking at the cases where a given activity is executed different times.
|
218 |
+
* :meth:`pm4py.filtering.filter_paths_performance`; filters an event log by looking at the performance of the paths between two activities.
|
219 |
+
* :meth:`pm4py.filtering.filter_variants_top_k`; filters an event log keeping the top-K variants.
|
220 |
+
* :meth:`pm4py.filtering.filter_variants_by_coverage_percentage`; filters an event log keeping the variants covering the specified percentage of cases.
|
221 |
+
* :meth:`pm4py.filtering.filter_prefixes`; filters the prefixes of an activity.
|
222 |
+
* :meth:`pm4py.filtering.filter_suffixes`; filters the suffixes of an activity.
|
223 |
+
* :meth:`pm4py.filtering.filter_trace_segments`; filters on the given traces (segments of activities).
|
224 |
+
* :meth:`pm4py.filtering.filter_four_eyes_principle`; apply the *Four-Eyes principle* on the event log (LTL).
|
225 |
+
* :meth:`pm4py.filtering.filter_activity_done_different_resources`; filters the cases where an activity is repeated by different resources (LTL).
|
226 |
+
|
227 |
+
Also, some filtering techniques are offered on top of object-centric event logs:
|
228 |
+
|
229 |
+
* :meth:`pm4py.filtering.filter_ocel_event_attribute`; filters the events of an object-centric event log having a given value for an attribute.
|
230 |
+
* :meth:`pm4py.filtering.filter_ocel_object_attribute`; filters the objects of an object-centric event log having a given value for an attribute.
|
231 |
+
* :meth:`pm4py.filtering.filter_ocel_object_types_allowed_activities`; filters the relations between events (activities) and objects (object types) in an object-centric event log.
|
232 |
+
* :meth:`pm4py.filtering.filter_ocel_object_per_type_count`; filters the objects of an object-centric event log having at least the specific amount of objects per object type.
|
233 |
+
* :meth:`pm4py.filtering.filter_ocel_start_events_per_object_type`; filters the events of an object-centric event log that start the lifecycle of an object of a given object type.
|
234 |
+
* :meth:`pm4py.filtering.filter_ocel_end_events_per_object_type`; filters the events of an object-centric event log that end the lifecycle of an object of a given object type.
|
235 |
+
* :meth:`pm4py.filtering.filter_ocel_events_timestamp`; filters the events of an object-centric event log based on a timestamp range.
|
236 |
+
* :meth:`pm4py.filtering.filter_ocel_object_types`; filters a specified collection of object types from the object-centric event log.
|
237 |
+
* :meth:`pm4py.filtering.filter_ocel_events`; filters a specified collection of event identifiers from the object-centric event log.
|
238 |
+
* :meth:`pm4py.filtering.filter_ocel_objects`; filters a specified collection of object identifiers from the object-centric event log.
|
239 |
+
* :meth:`pm4py.filtering.filter_ocel_cc_object`; filters a connected component from the object-centric event log to which the object with the provided identifier belongs.
|
240 |
+
* :meth:`pm4py.filtering.filter_ocel_cc_length`; filter the connected components from an object-centric event log having a number of objects falling in a provided range.
|
241 |
+
* :meth:`pm4py.filtering.filter_ocel_cc_otype`; filter the connected components from an object-centric event log having at least an object of the specified object type.
|
242 |
+
* :meth:`pm4py.filtering.filter_ocel_cc_activity`; filter the connected components from an object-centric event log having at least an event with the specified activity.
|
243 |
+
|
244 |
+
Machine Learning (:mod:`pm4py.ml`)
|
245 |
+
------------------------------------------
|
246 |
+
PM4Py offers some features useful for the application of machine learning techniques.
|
247 |
+
Among those:
|
248 |
+
|
249 |
+
* :meth:`pm4py.ml.split_train_test`; splits an event log into a *training event log* (default 80% of the cases) and a *test event log* (default 20% of the cases).
|
250 |
+
* :meth:`pm4py.ml.get_prefixes_from_log`; gets fixed-length prefixes for the cases of an event log.
|
251 |
+
* :meth:`pm4py.ml.extract_features_dataframe`; extracts machine learning features from an event log.
|
252 |
+
* :meth:`pm4py.ml.extract_ocel_features`; extracts machine learning features from an object-centric event log.
|
253 |
+
* :meth:`pm4py.ml.extract_temporal_features_dataframe`; extracts temporal features from an event log.
|
254 |
+
* :meth:`pm4py.ml.extract_target_vector`; extracts from a log object the target vector for a specific ML use case.
|
255 |
+
* :meth:`pm4py.ml.extract_outcome_enriched_dataframe`; inserts additional columns in the dataframe which are computed on the overall case, so they model the outcome of the case.
|
256 |
+
|
257 |
+
|
258 |
+
Simulation (:mod:`pm4py.sim`)
|
259 |
+
------------------------------------------
|
260 |
+
We offer different simulation algorithms, that starting from a model, are able to produce an output that follows the model and the different rules that have been provided by the user.
|
261 |
+
Among those:
|
262 |
+
|
263 |
+
* :meth:`pm4py.sim.play_out`; performs the play-out of a process model to obtain an event log.
|
264 |
+
* :meth:`pm4py.sim.generate_process_tree`; generates a process tree with the desidered number of nodes.
|
265 |
+
|
266 |
+
|
267 |
+
Object-Centric Process Mining (:mod:`pm4py.ocel`)
|
268 |
+
--------------------------------------------------
|
269 |
+
Traditional event logs, used by mainstream process mining techniques, require the events to be related to a case. A case is a set of events for a particular purpose. A case notion is a criteria to assign a case to the events.
|
270 |
+
|
271 |
+
However, in real processes this leads to two problems:
|
272 |
+
|
273 |
+
* If we consider the Order-to-Cash process, an order could be related to many different deliveries. If we consider the delivery as case notion, the same event of Create Order needs to be replicated in different cases (all the deliveries involving the order). This is called the convergence problem.
|
274 |
+
* If we consider the Order-to-Cash process, an order could contain different order items, each one with a different lifecycle. If we consider the order as case notion, several instances of the activities for the single items may be contained in the case, and this make the frequency/performance annotation of the process problematic. This is called the divergence problem.
|
275 |
+
|
276 |
+
Object-centric event logs relax the assumption that an event is related to exactly one case. Indeed, an event can be related to different objects of different object types.
|
277 |
+
|
278 |
+
Essentially, we can describe the different components of an object-centric event log as:
|
279 |
+
|
280 |
+
* Events, having an identifier, an activity, a timestamp, a list of related objects and a dictionary of other attributes.
|
281 |
+
* Objects, having an identifier, a type and a dictionary of other attributes.
|
282 |
+
* Attribute names, e.g., the possible keys for the attributes of the event/object attribute map.
|
283 |
+
* Object types, e.g., the possible types for the objects.
|
284 |
+
|
285 |
+
In PM4Py, we offer object-centric process mining features:
|
286 |
+
|
287 |
+
* :meth:`pm4py.ocel.ocel_get_object_types`; gets the object types from an object-centric event log.
|
288 |
+
* :meth:`pm4py.ocel.ocel_get_attribute_names`; gets the attribute names from an object-centric event log.
|
289 |
+
* :meth:`pm4py.ocel.ocel_flattening`; flattens object-centric event log with the selection of an object type.
|
290 |
+
* :meth:`pm4py.ocel.ocel_object_type_activities`; gets the activities related to an object type in an object-centric event log.
|
291 |
+
* :meth:`pm4py.ocel.ocel_objects_ot_count`; counts the objects for an object type.
|
292 |
+
* :meth:`pm4py.ocel.ocel_temporal_summary`; returns the temporal summary from an object-centric event log.
|
293 |
+
* :meth:`pm4py.ocel.ocel_objects_summary`; returns the objects summary from an object-centric event log.
|
294 |
+
* :meth:`pm4py.ocel.ocel_objects_interactions_summary`; returns the objects interactions from an object-centric event log.
|
295 |
+
* :meth:`pm4py.ocel.sample_ocel_objects`; returns a sampled object-centric event log picking a subset of the objects of the original one.
|
296 |
+
* :meth:`pm4py.ocel.sample_ocel_connected_components`; returns a sampled object-centric event log containing the provided number of connected components.
|
297 |
+
* :meth:`pm4py.ocel.ocel_drop_duplicates`; drops relations between events and objects happening at the same time.
|
298 |
+
* :meth:`pm4py.ocel.ocel_merge_duplicates`; merge events in the OCEL which are happening with the same activity at the same timestamp.
|
299 |
+
* :meth:`pm4py.ocel.ocel_o2o_enrichment`; enriches the O2O table of the OCEL with the grah-based relationships.
|
300 |
+
* :meth:`pm4py.ocel.ocel_e2o_lifecycle_enrichment`; enriches the relations table of the OCEL with lifecycle-based information.
|
301 |
+
* :meth:`pm4py.ocel.cluster_equivalent_ocel`; perform a clustering of the objects of an OCEL based on lifecycle/interactions similarity.
|
302 |
+
|
303 |
+
|
304 |
+
Some object-centric process discovery algorithms are also offered:
|
305 |
+
|
306 |
+
* :meth:`pm4py.ocel.discover_ocdfg`; discovers an object-centric directly-follows graph from the object-centric event log.
|
307 |
+
* :meth:`pm4py.ocel.discover_oc_petri_net`; discovers an object-centric Petri net from the object-centric event log.
|
308 |
+
* :meth:`pm4py.ocel.discover_objects_graph`; discovers an object-based graph from the object-centric event log.
|
309 |
+
|
310 |
+
|
311 |
+
LLM Integration (:mod:`pm4py.llm`)
|
312 |
+
------------------------------------------
|
313 |
+
|
314 |
+
The following methods provides just the abstractions of the given objects:
|
315 |
+
|
316 |
+
* :meth:`pm4py.llm.abstract_dfg`; provides the DFG abstraction of a traditional event log
|
317 |
+
* :meth:`pm4py.llm.abstract_variants`; provides the variants abstraction of a traditional event log
|
318 |
+
* :meth:`pm4py.llm.abstract_log_attributes`; provides the abstraction of the attributes/columns of the event log
|
319 |
+
* :meth:`pm4py.llm.abstract_log_features`; provides the abstraction of the machine learning features obtained from an event log
|
320 |
+
* :meth:`pm4py.llm.abstract_case`; provides the abstraction of a case (collection of events)
|
321 |
+
* :meth:`pm4py.llm.abstract_ocel`; provides the abstraction of an object-centric event log (list of events and objects)
|
322 |
+
* :meth:`pm4py.llm.abstract_ocel_ocdfg`; provides the abstraction of an object-centric event log (OC-DFG)
|
323 |
+
* :meth:`pm4py.llm.abstract_ocel_features`; provides the abstraction of an object-centric event log (features for ML)
|
324 |
+
* :meth:`pm4py.llm.abstract_event_stream`; provides an abstraction of the (last) events of the stream related to a traditional event log
|
325 |
+
* :meth:`pm4py.llm.abstract_temporal_profile`; provides the abstraction of a temporal profile model
|
326 |
+
* :meth:`pm4py.llm.abstract_petri_net`; provides the abstraction of a Petri net
|
327 |
+
* :meth:`pm4py.llm.abstract_declare`; provides the abstraction of a DECLARE model
|
328 |
+
* :meth:`pm4py.llm.abstract_log_skeleton`; provides the abstraction of a log skeleton model
|
329 |
+
* :meth:`pm4py.llm.explain_visualization`; explains one of the visualizations provided in pm4py using LVMs.
|
330 |
+
|
331 |
+
The following methods can be executed directly against the LLM APIs:
|
332 |
+
|
333 |
+
* :meth:`pm4py.llm.openai_query`; executes a prompt against OpenAI, returning the response as string
|
334 |
+
|
335 |
+
|
336 |
+
Basic Connectors (:mod:`pm4py.connectors`)
|
337 |
+
------------------------------------------
|
338 |
+
|
339 |
+
We offer some basic connectors to get an event log for some processes:
|
340 |
+
|
341 |
+
* :meth:`pm4py.connectors.extract_log_outlook_mails`; extracts a traditional Pandas dataframe representing the Outlook mails
|
342 |
+
* :meth:`pm4py.connectors.extract_log_outlook_calendar`; extracts a traditional Pandas dataframe representing the Outlook calendar
|
343 |
+
* :meth:`pm4py.connectors.extract_log_windows_events`; extracts a traditional Pandas dataframe containing the Windows events registry
|
344 |
+
* :meth:`pm4py.connectors.extract_log_chrome_history`; extracts a traditional Pandas dataframe containing the Chrome navigation history
|
345 |
+
* :meth:`pm4py.connectors.extract_log_firefox_history`; extracts a traditional Pandas dataframe containing the Firefox navigation history
|
346 |
+
* :meth:`pm4py.connectors.extract_log_github`; extracts a traditional Pandas dataframe of a Github repository (issues management)
|
347 |
+
* :meth:`pm4py.connectors.extract_log_camunda_workflow`; extracts a traditional Pandas dataframe from the database supporting Camunda
|
348 |
+
* :meth:`pm4py.connectors.extract_log_sap_o2c`; extracts a traditional Pandas dataframe from the database supporting SAP (O2C process)
|
349 |
+
* :meth:`pm4py.connectors.extract_log_sap_accounting`; extracts a traditional Pandas dataframe from the database supporting SAP (Accounting process)
|
350 |
+
* :meth:`pm4py.connectors.extract_ocel_outlook_mails`; extracts an object-centric event log representing the Outlook mails
|
351 |
+
* :meth:`pm4py.connectors.extract_ocel_outlook_calendar`; extracts an object-centric event log representing the Outlook calendar
|
352 |
+
* :meth:`pm4py.connectors.extract_ocel_windows_events`; extracts an object-centric event log representing the Windows events
|
353 |
+
* :meth:`pm4py.connectors.extract_ocel_chrome_history`; extracts an object-centric event log representing the Chrome history
|
354 |
+
* :meth:`pm4py.connectors.extract_ocel_firefox_history`; extracts an object-centric event log representing the Firefox history
|
355 |
+
* :meth:`pm4py.connectors.extract_ocel_github`; extracts an object-centric event log of a Github repository (issues management)
|
356 |
+
* :meth:`pm4py.connectors.extract_ocel_camunda_workflow`; extracts an object-centric event log from the database supporting Camunda
|
357 |
+
* :meth:`pm4py.connectors.extract_ocel_sap_o2c`; extracts an object-centric event log from the database supporting SAP (O2C process)
|
358 |
+
* :meth:`pm4py.connectors.extract_ocel_sap_accounting`; extracts an object-centric event log from the database supporting SAP (Accounting process)
|
359 |
+
|
360 |
+
|
361 |
+
Social Network Analysis (:mod:`pm4py.org`)
|
362 |
+
------------------------------------------
|
363 |
+
We offer different algorithms for the analysis of the organizational networks starting from an event log:
|
364 |
+
|
365 |
+
* :meth:`pm4py.org.discover_handover_of_work_network`; calculates the Handover of Work metric from the event log.
|
366 |
+
* :meth:`pm4py.org.discover_working_together_network`; calculates the Working Together metric from the event log.
|
367 |
+
* :meth:`pm4py.org.discover_activity_based_resource_similarity`; calculates the activity-based resource similarity.
|
368 |
+
* :meth:`pm4py.org.discover_subcontracting_network`; calculates the Subcontracting metric from the event log.
|
369 |
+
* :meth:`pm4py.org.discover_organizational_roles`; discovers the organizational roles from the event log.
|
370 |
+
* :meth:`pm4py.org.discover_network_analysis`; discovers the network analysis from the event log.
|
371 |
+
|
372 |
+
|
373 |
+
Privacy (:mod:`pm4py.privacy`)
|
374 |
+
------------------------------------------
|
375 |
+
We offer the following algorithms for the anonymization of event logs:
|
376 |
+
|
377 |
+
* :meth:`pm4py.privacy.anonymize_differential_privacy`; PRIPEL (Privacy-preserving event log publishing with contextual information) is a framework to publish event logs that fulfill differential privacy.
|
378 |
+
|
379 |
+
|
380 |
+
Utilities (:mod:`pm4py.utils`)
|
381 |
+
------------------------------------------
|
382 |
+
|
383 |
+
Other algorithms, which do not belong to the aforementioned categories, are collected in this section:
|
384 |
+
|
385 |
+
* :meth:`pm4py.utils.format_dataframe`; ensure the correct formatting of the Pandas dataframe.
|
386 |
+
* :meth:`pm4py.utils.parse_process_tree`; parses a process tree from a string.
|
387 |
+
* :meth:`pm4py.utils.parse_powl_model_string`; parses a POWL model from a string.
|
388 |
+
* :meth:`pm4py.utils.parse_event_log_string`; parses an event log from a collection of comma-separated traces.
|
389 |
+
* :meth:`pm4py.utils.project_on_event_attribute`; projects an event log on top of a given attribute (e.g., the activity), obtaining a list of list of values for the attribute.
|
390 |
+
* :meth:`pm4py.utils.sample_cases`; samples a traditional event log returning the specified amount of cases.
|
391 |
+
* :meth:`pm4py.utils.sample_events`; samples a traditional event log / OCEL returning the specified amount of events.
|
392 |
+
* :meth:`pm4py.utils.serialize`; serializes mainstream pm4py objects as strings.
|
393 |
+
* :meth:`pm4py.utils.deserialize`; de-serializes mainstream pm4py objects given their string representation.
|
394 |
+
* :meth:`pm4py.analysis.cluster_log`; cluster a log into sublogs using the provided clusterer.
|
395 |
+
* :meth:`pm4py.analysis.insert_case_service_waiting_time`; inserts for each case the service and waiting time.
|
396 |
+
* :meth:`pm4py.analysis.insert_case_arrival_finish_rate`; inserts the case arrival/finish rate.
|
397 |
+
* :meth:`pm4py.analysis.insert_artificial_start_end`; inserts artificial start/end activities in the event log.
|
398 |
+
* :meth:`pm4py.analysis.compute_emd`; computes the Earth-Mover Distance between two languages.
|
399 |
+
* :meth:`pm4py.analysis.check_is_workflow_net`; check if a Petri net is a workflow net.
|
400 |
+
* :meth:`pm4py.analysis.check_soundness`; checks if a Petri net is a sound workflow net (Woflan).
|
401 |
+
* :meth:`pm4py.analysis.solve_marking_equation`; solves the marking equation.
|
402 |
+
* :meth:`pm4py.analysis.maximal_decomposition`; performs the maximal decomposition of the given Petri net.
|
403 |
+
* :meth:`pm4py.analysis.generate_marking`; generates a Marking object from a textual representation.
|
404 |
+
* :meth:`pm4py.analysis.reduce_petri_net_invisibles`; reduces the invisible transitions of a Petri net when possible.
|
405 |
+
* :meth:`pm4py.analysis.reduce_petri_net_implicit_places`; reduces the implicit places in the Petri net (MURATA).
|
406 |
+
* :meth:`pm4py.analysis.get_enabled_transitions`; gets the transitions enabled in a given marking.
|
407 |
+
|
408 |
+
|
409 |
+
Overall List of Methods
|
410 |
+
------------------------------------------
|
411 |
+
|
412 |
+
.. autosummary::
|
413 |
+
:toctree: generated
|
414 |
+
|
415 |
+
pm4py.read
|
416 |
+
pm4py.read.read_bpmn
|
417 |
+
pm4py.read.read_dfg
|
418 |
+
pm4py.read.read_pnml
|
419 |
+
pm4py.read.read_ptml
|
420 |
+
pm4py.read.read_xes
|
421 |
+
pm4py.read.read_ocel_csv
|
422 |
+
pm4py.read.read_ocel_jsonocel
|
423 |
+
pm4py.read.read_ocel_xmlocel
|
424 |
+
pm4py.read.read_ocel_sqlite
|
425 |
+
pm4py.read.read_ocel2_xml
|
426 |
+
pm4py.read.read_ocel2_sqlite
|
427 |
+
pm4py.read.read_ocel2_json
|
428 |
+
pm4py.write
|
429 |
+
pm4py.write.write_bpmn
|
430 |
+
pm4py.write.write_dfg
|
431 |
+
pm4py.write.write_pnml
|
432 |
+
pm4py.write.write_ptml
|
433 |
+
pm4py.write.write_xes
|
434 |
+
pm4py.write.write_ocel_csv
|
435 |
+
pm4py.write.write_ocel_jsonocel
|
436 |
+
pm4py.write.write_ocel_xmlocel
|
437 |
+
pm4py.write.write_ocel_sqlite
|
438 |
+
pm4py.write.write_ocel2_xml
|
439 |
+
pm4py.write.write_ocel2_sqlite
|
440 |
+
pm4py.write.write_ocel2_json
|
441 |
+
pm4py.convert
|
442 |
+
pm4py.convert.convert_to_event_log
|
443 |
+
pm4py.convert.convert_to_event_stream
|
444 |
+
pm4py.convert.convert_to_dataframe
|
445 |
+
pm4py.convert.convert_to_bpmn
|
446 |
+
pm4py.convert.convert_to_petri_net
|
447 |
+
pm4py.convert.convert_to_process_tree
|
448 |
+
pm4py.convert.convert_to_reachability_graph
|
449 |
+
pm4py.convert.convert_log_to_ocel
|
450 |
+
pm4py.convert.convert_log_to_networkx
|
451 |
+
pm4py.convert.convert_ocel_to_networkx
|
452 |
+
pm4py.convert.convert_petri_net_to_networkx
|
453 |
+
pm4py.convert.convert_petri_net_type
|
454 |
+
pm4py.discovery
|
455 |
+
pm4py.discovery.discover_dfg
|
456 |
+
pm4py.discovery.discover_performance_dfg
|
457 |
+
pm4py.discovery.discover_petri_net_alpha
|
458 |
+
pm4py.discovery.discover_petri_net_inductive
|
459 |
+
pm4py.discovery.discover_petri_net_heuristics
|
460 |
+
pm4py.discovery.discover_petri_net_ilp
|
461 |
+
pm4py.discovery.discover_process_tree_inductive
|
462 |
+
pm4py.discovery.discover_heuristics_net
|
463 |
+
pm4py.discovery.derive_minimum_self_distance
|
464 |
+
pm4py.discovery.discover_footprints
|
465 |
+
pm4py.discovery.discover_eventually_follows_graph
|
466 |
+
pm4py.discovery.discover_bpmn_inductive
|
467 |
+
pm4py.discovery.discover_transition_system
|
468 |
+
pm4py.discovery.discover_prefix_tree
|
469 |
+
pm4py.discovery.discover_temporal_profile
|
470 |
+
pm4py.discovery.discover_declare
|
471 |
+
pm4py.discovery.discover_log_skeleton
|
472 |
+
pm4py.discovery.discover_batches
|
473 |
+
pm4py.discovery.discover_powl
|
474 |
+
pm4py.conformance
|
475 |
+
pm4py.conformance.conformance_diagnostics_token_based_replay
|
476 |
+
pm4py.conformance.conformance_diagnostics_alignments
|
477 |
+
pm4py.conformance.conformance_diagnostics_footprints
|
478 |
+
pm4py.conformance.fitness_token_based_replay
|
479 |
+
pm4py.conformance.fitness_alignments
|
480 |
+
pm4py.conformance.fitness_footprints
|
481 |
+
pm4py.conformance.precision_token_based_replay
|
482 |
+
pm4py.conformance.precision_alignments
|
483 |
+
pm4py.conformance.precision_footprints
|
484 |
+
pm4py.conformance.replay_prefix_tbr
|
485 |
+
pm4py.conformance.conformance_temporal_profile
|
486 |
+
pm4py.conformance.conformance_declare
|
487 |
+
pm4py.conformance.conformance_log_skeleton
|
488 |
+
pm4py.vis
|
489 |
+
pm4py.vis.view_petri_net
|
490 |
+
pm4py.vis.save_vis_petri_net
|
491 |
+
pm4py.vis.view_performance_dfg
|
492 |
+
pm4py.vis.save_vis_performance_dfg
|
493 |
+
pm4py.vis.view_dfg
|
494 |
+
pm4py.vis.save_vis_dfg
|
495 |
+
pm4py.vis.view_process_tree
|
496 |
+
pm4py.vis.save_vis_process_tree
|
497 |
+
pm4py.vis.view_bpmn
|
498 |
+
pm4py.vis.save_vis_bpmn
|
499 |
+
pm4py.vis.view_heuristics_net
|
500 |
+
pm4py.vis.save_vis_heuristics_net
|
501 |
+
pm4py.vis.view_dotted_chart
|
502 |
+
pm4py.vis.save_vis_dotted_chart
|
503 |
+
pm4py.vis.view_sna
|
504 |
+
pm4py.vis.save_vis_sna
|
505 |
+
pm4py.vis.view_case_duration_graph
|
506 |
+
pm4py.vis.save_vis_case_duration_graph
|
507 |
+
pm4py.vis.view_events_per_time_graph
|
508 |
+
pm4py.vis.save_vis_events_per_time_graph
|
509 |
+
pm4py.vis.view_performance_spectrum
|
510 |
+
pm4py.vis.save_vis_performance_spectrum
|
511 |
+
pm4py.vis.view_events_distribution_graph
|
512 |
+
pm4py.vis.save_vis_events_distribution_graph
|
513 |
+
pm4py.vis.view_ocdfg
|
514 |
+
pm4py.vis.save_vis_ocdfg
|
515 |
+
pm4py.vis.view_ocpn
|
516 |
+
pm4py.vis.save_vis_ocpn
|
517 |
+
pm4py.vis.view_object_graph
|
518 |
+
pm4py.vis.save_vis_object_graph
|
519 |
+
pm4py.vis.view_network_analysis
|
520 |
+
pm4py.vis.save_vis_network_analysis
|
521 |
+
pm4py.vis.view_transition_system
|
522 |
+
pm4py.vis.save_vis_transition_system
|
523 |
+
pm4py.vis.view_prefix_tree
|
524 |
+
pm4py.vis.save_vis_prefix_tree
|
525 |
+
pm4py.vis.view_alignments
|
526 |
+
pm4py.vis.save_vis_alignments
|
527 |
+
pm4py.vis.view_footprints
|
528 |
+
pm4py.vis.save_vis_footprints
|
529 |
+
pm4py.vis.view_powl
|
530 |
+
pm4py.vis.save_vis_powl
|
531 |
+
pm4py.stats
|
532 |
+
pm4py.stats.get_start_activities
|
533 |
+
pm4py.stats.get_end_activities
|
534 |
+
pm4py.stats.get_event_attributes
|
535 |
+
pm4py.stats.get_trace_attributes
|
536 |
+
pm4py.stats.get_event_attribute_values
|
537 |
+
pm4py.stats.get_trace_attribute_values
|
538 |
+
pm4py.stats.get_variants
|
539 |
+
pm4py.stats.get_variants_as_tuples
|
540 |
+
pm4py.stats.split_by_process_variant
|
541 |
+
pm4py.stats.get_variants_paths_duration
|
542 |
+
pm4py.stats.get_minimum_self_distances
|
543 |
+
pm4py.stats.get_minimum_self_distance_witnesses
|
544 |
+
pm4py.stats.get_case_arrival_average
|
545 |
+
pm4py.stats.get_rework_cases_per_activity
|
546 |
+
pm4py.stats.get_cycle_time
|
547 |
+
pm4py.stats.get_all_case_durations
|
548 |
+
pm4py.stats.get_case_duration
|
549 |
+
pm4py.stats.get_frequent_trace_segments
|
550 |
+
pm4py.stats.get_service_time
|
551 |
+
pm4py.stats.get_activity_position_summary
|
552 |
+
pm4py.stats.get_stochastic_language
|
553 |
+
pm4py.filtering
|
554 |
+
pm4py.filtering.filter_log_relative_occurrence_event_attribute
|
555 |
+
pm4py.filtering.filter_start_activities
|
556 |
+
pm4py.filtering.filter_end_activities
|
557 |
+
pm4py.filtering.filter_event_attribute_values
|
558 |
+
pm4py.filtering.filter_trace_attribute_values
|
559 |
+
pm4py.filtering.filter_variants
|
560 |
+
pm4py.filtering.filter_directly_follows_relation
|
561 |
+
pm4py.filtering.filter_eventually_follows_relation
|
562 |
+
pm4py.filtering.filter_time_range
|
563 |
+
pm4py.filtering.filter_between
|
564 |
+
pm4py.filtering.filter_case_size
|
565 |
+
pm4py.filtering.filter_case_performance
|
566 |
+
pm4py.filtering.filter_activities_rework
|
567 |
+
pm4py.filtering.filter_paths_performance
|
568 |
+
pm4py.filtering.filter_variants_top_k
|
569 |
+
pm4py.filtering.filter_variants_by_coverage_percentage
|
570 |
+
pm4py.filtering.filter_prefixes
|
571 |
+
pm4py.filtering.filter_suffixes
|
572 |
+
pm4py.filtering.filter_trace_segments
|
573 |
+
pm4py.filtering.filter_ocel_event_attribute
|
574 |
+
pm4py.filtering.filter_ocel_object_attribute
|
575 |
+
pm4py.filtering.filter_ocel_object_types_allowed_activities
|
576 |
+
pm4py.filtering.filter_ocel_object_per_type_count
|
577 |
+
pm4py.filtering.filter_ocel_start_events_per_object_type
|
578 |
+
pm4py.filtering.filter_ocel_end_events_per_object_type
|
579 |
+
pm4py.filtering.filter_ocel_events_timestamp
|
580 |
+
pm4py.filtering.filter_four_eyes_principle
|
581 |
+
pm4py.filtering.filter_activity_done_different_resources
|
582 |
+
pm4py.filtering.filter_ocel_object_types
|
583 |
+
pm4py.filtering.filter_ocel_events
|
584 |
+
pm4py.filtering.filter_ocel_objects
|
585 |
+
pm4py.filtering.filter_ocel_cc_object
|
586 |
+
pm4py.filtering.filter_ocel_cc_length
|
587 |
+
pm4py.filtering.filter_ocel_cc_otype
|
588 |
+
pm4py.filtering.filter_ocel_cc_activity
|
589 |
+
pm4py.ml
|
590 |
+
pm4py.ml.split_train_test
|
591 |
+
pm4py.ml.get_prefixes_from_log
|
592 |
+
pm4py.ml.extract_features_dataframe
|
593 |
+
pm4py.ml.extract_temporal_features_dataframe
|
594 |
+
pm4py.ml.extract_target_vector
|
595 |
+
pm4py.ml.extract_outcome_enriched_dataframe
|
596 |
+
pm4py.ml.extract_ocel_features
|
597 |
+
pm4py.sim
|
598 |
+
pm4py.sim.play_out
|
599 |
+
pm4py.sim.generate_process_tree
|
600 |
+
pm4py.ocel
|
601 |
+
pm4py.ocel.ocel_get_object_types
|
602 |
+
pm4py.ocel.ocel_get_attribute_names
|
603 |
+
pm4py.ocel.ocel_flattening
|
604 |
+
pm4py.ocel.ocel_object_type_activities
|
605 |
+
pm4py.ocel.ocel_objects_ot_count
|
606 |
+
pm4py.ocel.discover_ocdfg
|
607 |
+
pm4py.ocel.discover_oc_petri_net
|
608 |
+
pm4py.ocel.ocel_temporal_summary
|
609 |
+
pm4py.ocel.ocel_objects_summary
|
610 |
+
pm4py.ocel.ocel_objects_interactions_summary
|
611 |
+
pm4py.ocel.sample_ocel_objects
|
612 |
+
pm4py.ocel.sample_ocel_connected_components
|
613 |
+
pm4py.ocel.ocel_drop_duplicates
|
614 |
+
pm4py.ocel.ocel_merge_duplicates
|
615 |
+
pm4py.ocel.ocel_o2o_enrichment
|
616 |
+
pm4py.ocel.ocel_e2o_lifecycle_enrichment
|
617 |
+
pm4py.ocel.cluster_equivalent_ocel
|
618 |
+
pm4py.llm
|
619 |
+
pm4py.llm.abstract_dfg
|
620 |
+
pm4py.llm.abstract_variants
|
621 |
+
pm4py.llm.abstract_ocel
|
622 |
+
pm4py.llm.abstract_ocel_ocdfg
|
623 |
+
pm4py.llm.abstract_ocel_features
|
624 |
+
pm4py.llm.abstract_event_stream
|
625 |
+
pm4py.llm.abstract_petri_net
|
626 |
+
pm4py.llm.abstract_log_attributes
|
627 |
+
pm4py.llm.abstract_log_features
|
628 |
+
pm4py.llm.abstract_temporal_profile
|
629 |
+
pm4py.llm.abstract_case
|
630 |
+
pm4py.llm.abstract_declare
|
631 |
+
pm4py.llm.abstract_log_skeleton
|
632 |
+
pm4py.llm.openai_query
|
633 |
+
pm4py.llm.explain_visualization
|
634 |
+
pm4py.connectors.extract_log_outlook_mails
|
635 |
+
pm4py.connectors.extract_log_outlook_calendar
|
636 |
+
pm4py.connectors.extract_log_windows_events
|
637 |
+
pm4py.connectors.extract_log_chrome_history
|
638 |
+
pm4py.connectors.extract_log_firefox_history
|
639 |
+
pm4py.connectors.extract_log_github
|
640 |
+
pm4py.connectors.extract_log_camunda_workflow
|
641 |
+
pm4py.connectors.extract_log_sap_o2c
|
642 |
+
pm4py.connectors.extract_log_sap_accounting
|
643 |
+
pm4py.connectors.extract_ocel_outlook_mails
|
644 |
+
pm4py.connectors.extract_ocel_outlook_calendar
|
645 |
+
pm4py.connectors.extract_ocel_windows_events
|
646 |
+
pm4py.connectors.extract_ocel_chrome_history
|
647 |
+
pm4py.connectors.extract_ocel_firefox_history
|
648 |
+
pm4py.connectors.extract_ocel_github
|
649 |
+
pm4py.connectors.extract_ocel_camunda_workflow
|
650 |
+
pm4py.connectors.extract_ocel_sap_o2c
|
651 |
+
pm4py.connectors.extract_ocel_sap_accounting
|
652 |
+
pm4py.org
|
653 |
+
pm4py.org.discover_handover_of_work_network
|
654 |
+
pm4py.org.discover_working_together_network
|
655 |
+
pm4py.org.discover_activity_based_resource_similarity
|
656 |
+
pm4py.org.discover_subcontracting_network
|
657 |
+
pm4py.org.discover_organizational_roles
|
658 |
+
pm4py.org.discover_network_analysis
|
659 |
+
pm4py.analysis
|
660 |
+
pm4py.analysis.cluster_log
|
661 |
+
pm4py.analysis.insert_case_service_waiting_time
|
662 |
+
pm4py.analysis.insert_case_arrival_finish_rate
|
663 |
+
pm4py.analysis.solve_marking_equation
|
664 |
+
pm4py.analysis.check_soundness
|
665 |
+
pm4py.analysis.insert_artificial_start_end
|
666 |
+
pm4py.analysis.check_is_workflow_net
|
667 |
+
pm4py.analysis.maximal_decomposition
|
668 |
+
pm4py.analysis.generate_marking
|
669 |
+
pm4py.analysis.compute_emd
|
670 |
+
pm4py.analysis.reduce_petri_net_invisibles
|
671 |
+
pm4py.analysis.reduce_petri_net_implicit_places
|
672 |
+
pm4py.analysis.get_enabled_transitions
|
673 |
+
pm4py.utils
|
674 |
+
pm4py.utils.rebase
|
675 |
+
pm4py.utils.parse_process_tree
|
676 |
+
pm4py.utils.parse_powl_model_string
|
677 |
+
pm4py.utils.format_dataframe
|
678 |
+
pm4py.utils.serialize
|
679 |
+
pm4py.utils.deserialize
|
680 |
+
pm4py.utils.parse_event_log_string
|
681 |
+
pm4py.utils.project_on_event_attribute
|
682 |
+
pm4py.utils.sample_cases
|
683 |
+
pm4py.utils.sample_events
|
pm4py/docs/source/conf.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
#
|
3 |
+
# Configuration file for the Sphinx documentation builder.
|
4 |
+
#
|
5 |
+
# This file does only contain a selection of the most common options. For a
|
6 |
+
# full list see the documentation:
|
7 |
+
# http://www.sphinx-doc.org/en/master/config
|
8 |
+
|
9 |
+
# -- Path setup --------------------------------------------------------------
|
10 |
+
|
11 |
+
# If extensions (or modules to document with autodoc) are in another directory,
|
12 |
+
# add these directories to sys.path here. If the directory is relative to the
|
13 |
+
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
14 |
+
#
|
15 |
+
import os
|
16 |
+
import sys
|
17 |
+
|
18 |
+
sys.path.insert(0, os.path.abspath('../../'))
|
19 |
+
|
20 |
+
# -- Project information -----------------------------------------------------
|
21 |
+
|
22 |
+
project = 'pm4py'
|
23 |
+
project_copyright = 'Process Intelligence Solutions'
|
24 |
+
author = 'Process Intelligence Solutions'
|
25 |
+
|
26 |
+
# The short X.Y version
|
27 |
+
version = '2.7'
|
28 |
+
# The full version, including alpha/beta/rc tags
|
29 |
+
release = '2.7.10'
|
30 |
+
|
31 |
+
# -- General configuration ---------------------------------------------------
|
32 |
+
|
33 |
+
# If your documentation needs a minimal Sphinx version, state it here.
|
34 |
+
#
|
35 |
+
# needs_sphinx = '1.0'
|
36 |
+
|
37 |
+
# Add any Sphinx extension module names here, as strings. They can be
|
38 |
+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
39 |
+
# ones.
|
40 |
+
extensions = [
|
41 |
+
'sphinx.ext.autodoc',
|
42 |
+
'sphinx_autodoc_annotation',
|
43 |
+
'sphinx.ext.viewcode',
|
44 |
+
'sphinx.ext.autosummary',
|
45 |
+
]
|
46 |
+
|
47 |
+
autodoc_mock_imports = ["cvxopt"]
|
48 |
+
napoleon_google_docstring = False
|
49 |
+
napoleon_use_param = False
|
50 |
+
napoleon_use_ivar = True
|
51 |
+
autodoc_member_order = 'bysource'
|
52 |
+
|
53 |
+
# Add any paths that contain templates here, relative to this directory.
|
54 |
+
templates_path = ['_templates']
|
55 |
+
|
56 |
+
# The suffix(es) of source filenames.
|
57 |
+
# You can specify multiple suffix as a list of string:
|
58 |
+
#
|
59 |
+
# source_suffix = ['.rst', '.md']
|
60 |
+
source_suffix = '.rst'
|
61 |
+
|
62 |
+
# The master toctree document.
|
63 |
+
master_doc = 'index'
|
64 |
+
|
65 |
+
# The language for content autogenerated by Sphinx. Refer to documentation
|
66 |
+
# for a list of supported languages.
|
67 |
+
#
|
68 |
+
# This is also used if you do content translation via gettext catalogs.
|
69 |
+
# Usually you set "language" from the command line for these traces.
|
70 |
+
language = None
|
71 |
+
|
72 |
+
# List of patterns, relative to source directory, that match files and
|
73 |
+
# directories to ignore when looking for source files.
|
74 |
+
# This pattern also affects html_static_path and html_extra_path .
|
75 |
+
exclude_patterns = ['tests', 'scripts']
|
76 |
+
|
77 |
+
# The name of the Pygments (syntax highlighting) style to use.
|
78 |
+
pygments_style = 'sphinx'
|
79 |
+
|
80 |
+
# -- Options for HTML output -------------------------------------------------
|
81 |
+
|
82 |
+
# The theme to use for HTML and HTML Help pages. See the documentation for
|
83 |
+
# a list of builtin themes.
|
84 |
+
#
|
85 |
+
html_theme = 'pydata_sphinx_theme'
|
86 |
+
html_logo = 'pm4py-logo.png'
|
87 |
+
|
88 |
+
# Theme options are theme-specific and customize the look and feel of a theme
|
89 |
+
# further. For a list of options available for each theme, see the
|
90 |
+
# documentation.
|
91 |
+
#
|
92 |
+
html_theme_options = {
|
93 |
+
"logo": {
|
94 |
+
"image_light": "pm4py-logo.png",
|
95 |
+
"image_dark": "pm4py-logo.png",
|
96 |
+
},
|
97 |
+
"navbar_persistent": []
|
98 |
+
}
|
99 |
+
|
100 |
+
# Add any paths that contain custom static files (such as style sheets) here,
|
101 |
+
# relative to this directory. They are copied after the builtin static files,
|
102 |
+
# so a file named "default.css" will overwrite the builtin "default.css".
|
103 |
+
html_static_path = ['_static']
|
104 |
+
|
105 |
+
html_css_files = ["css/custom.css"]
|
106 |
+
|
107 |
+
# Custom sidebar templates, must be a dictionary that maps document names
|
108 |
+
# to template names.
|
109 |
+
#
|
110 |
+
# The default sidebars (for documents that don't match any pattern) are
|
111 |
+
# defined by theme itself. Builtin themes are using these templates by
|
112 |
+
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
|
113 |
+
# 'searchbox.html']``.
|
114 |
+
#
|
115 |
+
# html_sidebars = {}
|
116 |
+
|
117 |
+
|
118 |
+
# -- Options for HTMLHelp output ---------------------------------------------
|
119 |
+
|
120 |
+
# Output file base name for HTML help builder.
|
121 |
+
htmlhelp_basename = 'pm4pydoc'
|
122 |
+
|
123 |
+
# -- Options for LaTeX output ------------------------------------------------
|
124 |
+
|
125 |
+
latex_elements = {
|
126 |
+
# The paper size ('letterpaper' or 'a4paper').
|
127 |
+
#
|
128 |
+
# 'papersize': 'letterpaper',
|
129 |
+
|
130 |
+
# The font size ('10pt', '11pt' or '12pt').
|
131 |
+
#
|
132 |
+
# 'pointsize': '10pt',
|
133 |
+
|
134 |
+
# Additional stuff for the LaTeX preamble.
|
135 |
+
#
|
136 |
+
# 'preamble': '',
|
137 |
+
|
138 |
+
# Latex figure (float) alignment
|
139 |
+
#
|
140 |
+
# 'figure_align': 'htbp',
|
141 |
+
}
|
142 |
+
|
143 |
+
# Grouping the document tree into LaTeX files. List of tuples
|
144 |
+
# (source start file, target name, title,
|
145 |
+
# author, documentclass [howto, manual, or own class]).
|
146 |
+
latex_documents = [
|
147 |
+
(master_doc, 'pm4py.tex', 'pm4py API reference',
|
148 |
+
'pm4py @ Process Intelligence Solutions', 'manual'),
|
149 |
+
]
|
150 |
+
|
151 |
+
# -- Options for manual page output ------------------------------------------
|
152 |
+
|
153 |
+
# One entry per manual page. List of tuples
|
154 |
+
# (source start file, name, description, authors, manual section).
|
155 |
+
man_pages = [
|
156 |
+
(master_doc, 'pm4py', 'pm4py API reference',
|
157 |
+
[author], 1)
|
158 |
+
]
|
159 |
+
|
160 |
+
# -- Options for Texinfo output ----------------------------------------------
|
161 |
+
|
162 |
+
# Grouping the document tree into Texinfo files. List of tuples
|
163 |
+
# (source start file, target name, title, author,
|
164 |
+
# dir menu entry, description, category)
|
165 |
+
texinfo_documents = [
|
166 |
+
(master_doc, 'pm4py', 'pm4py API reference',
|
167 |
+
author, 'pm4py', 'Process Mining for Python API reference.',
|
168 |
+
'Miscellaneous'),
|
169 |
+
]
|
170 |
+
|
171 |
+
# -- Extension configuration -------------------------------------------------
|
pm4py/docs/source/examples.rst
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Examples
|
2 |
+
============
|
3 |
+
|
4 |
+
Filtering XYZ
|
5 |
+
-------------
|
6 |
+
|
7 |
+
Decision Point Analysis
|
8 |
+
-----------------------
|
9 |
+
|
10 |
+
Computing a DFG with Performance Overlay
|
11 |
+
----------------------------------------
|
pm4py/docs/source/getting_started.rst
ADDED
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Getting Started
|
2 |
+
===============
|
3 |
+
|
4 |
+
Understanding Process Mining
|
5 |
+
----------------------------
|
6 |
+
|
7 |
+
.. raw:: html
|
8 |
+
|
9 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/XLHtvt36g6U" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
10 |
+
<a href="https://www.youtube.com/embed/XLHtvt36g6U" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #1 What is Process Mining?</a>
|
11 |
+
|
12 |
+
|
13 |
+
In this section, we explain what process mining is all about.
|
14 |
+
Note that this page describes the basics of process mining, i.e., it is not a full-fledged reference of every possible aspect of process mining.
|
15 |
+
Therefore, for a more detailed overview of process mining, we recommend looking at the `Coursera MOOC on Process Mining <https://www.coursera.org/learn/process-mining>`_ and the `seminal book of Wil van der Aalst <https://www.springer.com/gp/book/9783662498507>`_.
|
16 |
+
Furthermore, before you begin, please install PM4Py on your system, i.e., as described in the :doc:`install` section.
|
17 |
+
|
18 |
+
Processes in our Modern World
|
19 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
20 |
+
The vast majority of companies active in virtually any domain execute a process.
|
21 |
+
Whether the core business of a company is to deliver a product, e.g., manufacture a car, cook a delicious pizza, etc., or provide a service, e.g., providing you with a mortgage to buy your dream house, paying back your insurance claim, etc., for efficient delivery of your product/service, processes are executed.
|
22 |
+
Hence, a natural question is: “What is a process?”.
|
23 |
+
In general, several notions of the concept of a process exist.
|
24 |
+
However, in process mining, we typically assume the following conceptual definition:
|
25 |
+
|
26 |
+
"A **process** represents a **collection of activities** that we **execute** to achieve a **certain goal**."
|
27 |
+
|
28 |
+
For example, consider the burger restaurant just around the corner, which also delivers burgers.
|
29 |
+
When you call the restaurant to order your beloved burger, the first action taken by the employee, let’s call her **Lucy**, taking your call, is to *take your order*.
|
30 |
+
Let’s assume you go for a tasty cheeseburger with a can of soda.
|
31 |
+
After Lucy has *entered your order in the cash register*, she *asks for your address*, which she adds to the order.
|
32 |
+
Finally, she *asks for your preferred means of payment*, after which she provides you with a rough estimate of the time until delivery.
|
33 |
+
When Lucy finishes the call, she *prints your order* and hands it over to the chef, let’s call him **Luigi**.
|
34 |
+
Since you’ve called relatively early, Luigi can start *preparing your burger* right away.
|
35 |
+
At the same time, Lucy *takes a can of soda out of the refrigerator* and places it on the counter.
|
36 |
+
A new call comes in from a different customer, which she handles roughly the same way as yours.
|
37 |
+
When Luigi *finishes your burger*, he *slides it into a carton box* and hands the box over to Lucy.
|
38 |
+
Lucy *wraps the order* in a bag.
|
39 |
+
She then hands the bag with your burger and soda to **Mike**, which uses a fancy electrical bicycle to *bring your order to your home*.
|
40 |
+
|
41 |
+
In this small example, let’s assume that we are interested in the process, i.e., the collection of activities performed for your order.
|
42 |
+
Based on the scenario we just presented, the steps look as follows:
|
43 |
+
|
44 |
+
|
45 |
+
1. **Lucy** *takes your order*
|
46 |
+
#. **Lucy** *notes down your address*
|
47 |
+
#. **Lucy** *notes down your preferred payment method*
|
48 |
+
#. **Luigi** *prepares your burger*
|
49 |
+
#. **Lucy** *grabs your can of soda*
|
50 |
+
#. **Luigi** *puts your burger in a box*
|
51 |
+
#. **Lucy** *wraps your order*
|
52 |
+
#. **Mike** *delivers your order*
|
53 |
+
|
54 |
+
Importing Your First Event Log
|
55 |
+
------------------------------
|
56 |
+
In this section, we explain how to import (and export) event data in PM4Py. We assume that you are familiar with the conceptual basics of process mining, i.e., as described in the previous section.
|
57 |
+
|
58 |
+
File Types: CSV and XES
|
59 |
+
~~~~~~~~~~~~~~~~~~~~~~~~
|
60 |
+
As explained in the previous section, process mining exploits Event Logs to generate knowledge of a process. A wide variety of information systems, e.g., SAP, ORACLE, SalesForce, etc., allow us to extract, in one way or the other, event logs similar to the example event log presented in Table 1 and Table 2. All the examples we show in this section and all algorithms implemented in pm4py assume that we have already extracted the event data into an appropriate event log format. Hence, the core of pm4py does not support any data extraction features. However, we provide solutions for data extraction purposes, i.e., please inspect the corresponding `solutions page </solution-connectors>`_.
|
61 |
+
|
62 |
+
In order to support interoperability between different process mining tools and libraries, two standard data formats are used to capture event logs, i.e., Comma Separated Value (CSV) files and eXtensible Event Stream (XES) files. CSV files resemble the example tables shown in the previous section, i.e., Table 1 and Table 2. Each line in such a file describes an event that occurred. The columns represent the same type of data, as shown in the examples, e.g., the case for which the event occurred, the activity, the timestamp, the resource executing the activity, etc. The XES file format is an XML-based format that allows us to describe process behavior. We will not go into details w.r.t. the format of XES files, i.e., we refer to `https://www.xes-standard.org <https://www.xes-standard.org>`_ for an overview.
|
63 |
+
|
64 |
+
In the remainder of this tutorial, we will use an oftenly used dummy example event log to explain the basic process mining operations. The process that we are considering is a simplified process related to customer complaint handling, i.e., *taken from the book of van der Aalst*. The process, and the event data we are going to use, looks as follows.
|
65 |
+
|
66 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/bpmn_running_example.png
|
67 |
+
|
68 |
+
*Figure 3: Running example BPMN-based process model describing the behavior of the simple process that we use in this tutorial.*
|
69 |
+
|
70 |
+
Let’s get started! We have prepared a small sample event log, containing behavior similar equal to the process model in Figure 3. `You can find the sample event log here </static/assets/data/getting_started/running-example.csv>`_. Please download the file and store it somewhere on your computer, e.g., your Downloads folder (On Windows: this is 'C:/Users/user_name/Dowloads'). Consider Figure 4, in which we depict the first 25 rows of the example file.
|
71 |
+
|
72 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/csv_snapshot.png
|
73 |
+
|
74 |
+
*Figure 4: Running example csv data set which we will use in this tutorial.*
|
75 |
+
|
76 |
+
Note that, the data depicted in Figure 4 describes a table, however, in text format. Each line in the file corresponds to a row in the table. Whenever we encounter a ‘;’ symbol on a line, this implies that we are ‘entering’ the next column. The first line (i.e., row) specifies the name of each column. Observe that, in the data table described by the file, we have 5 columns, being: *case_id*, *activity*, *timestamp*, *costs* and *resource*. Observe that, similar to our previous example, the first column represents the case identifier, i.e., allowing us to identify what activity has been logged in the context of what instance of the process. The second column shows the activity that has been performed. The third column shows at what point in time the activity was recorded. In this example data, additional information is present as well. In this case, the fourth column tracks the costs of the activity, whereas the fifth row tracks what resource has performed the activity.
|
77 |
+
|
78 |
+
Before we go into loading the example file into PM4Py, let us briefly take a look at the data. Observe that, lines 2-10 show the events that have been recorded for the process identified by case identifier 3. We observe that first a register request activity was performed, followed by the examine casually, check ticket, decide,reinitiate request, examine thoroughlycheck ticket,decide, and finally, pay compensation activities. Note that, indeed, in this case the recorded process instance behaves as described by the model depicted in Figure 3.
|
79 |
+
|
80 |
+
Loading CSV Files
|
81 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
82 |
+
|
83 |
+
.. raw:: html
|
84 |
+
|
85 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/bWOKVx0PO6g" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
86 |
+
<a href="https://www.youtube.com/embed/bWOKVx0PO6g" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #2 Importing CSV Files</a>
|
87 |
+
|
88 |
+
|
89 |
+
Given that we have familiarized ourselves with event logs and a way to represent event logs in a CSV file, it is time to start doing some process mining! We are going to load the event data, and, we are going to count how many cases are present in the event log, as well as the number of events. Note that, for all this, we are effectively using a third-party library called `pandas <https://pandas.pydata.org>`_. We do so because pandas is the de-facto standard of loading/manipulating csv-based data. Hence, *any process mining algorithm implemented in PM4Py, using an event log as an input, can work directly with a pandas file!*
|
90 |
+
|
91 |
+
.. code-block:: python3
|
92 |
+
|
93 |
+
import pandas
|
94 |
+
|
95 |
+
|
96 |
+
def import_csv(file_path):
|
97 |
+
event_log = pandas.read_csv(file_path, sep=';')
|
98 |
+
num_events = len(event_log)
|
99 |
+
num_cases = len(event_log.case_id.unique())
|
100 |
+
print("Number of events: {}\nNumber of cases: {}".format(num_events, num_cases))
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
import_csv("C:/Users/demo/Downloads/running-example.csv")
|
105 |
+
|
106 |
+
*Example 1: Loading an event log stored in a CSV file and computing the number of cases and the number of events in the file. In this example, no PM4Py is used yet, it is all being handled using pandas. If you run the code yourself, make sure to replace the path 'C:/Users/demo/Downloads/running-example.csv', to the appropriate path on your computer containing the running example file.*
|
107 |
+
|
108 |
+
We will quickly go through the above example code. In the first line, we import the pandas library. The last lines (containing the if-statement) make sure that the code, when pasted, runs on its own (we will omit these lines from future examples). The core of the script is the function **import_csv**. As an input parameter, it requires the path to the csv file. The script uses the pandas read_csv-function, to load the event data. To calculate the number of events, we simply query the length of the data frame, i.e., by calling **len(event_log)**. To calculate the number of cases, we use a built-in pandas function to return the number of unique values of the case_id column, i.e., **event_log.case_id.unique()**. Since that function returns a pandas built-in array object containing all the values of the column, we again query for its length. Note that, as is often the case when programming, there is a wide variety of ways to compute the aforementioned example statistics on the basis of a given CSV file.
|
109 |
+
|
110 |
+
Now we have loaded our first event log, it is time to put some PM4Py into the mix. Let us assume that we are not only interested in the number of events and cases, yet, we also want to figure out what activities occur first, and what activities occur last in the traces described by the event log. PM4Py has a specific built-in function for this, i.e., **get_start_activities()** and **get_end_activities()** respectively. Consider Example 2, in which we present the corresponding script.
|
111 |
+
|
112 |
+
.. code-block:: python3
|
113 |
+
|
114 |
+
import pandas
|
115 |
+
import pm4py
|
116 |
+
|
117 |
+
|
118 |
+
def import_csv(file_path):
|
119 |
+
event_log = pandas.read_csv(file_path, sep=';')
|
120 |
+
event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
|
121 |
+
start_activities = pm4py.get_start_activities(event_log)
|
122 |
+
end_activities = pm4py.get_end_activities(event_log)
|
123 |
+
print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))
|
124 |
+
|
125 |
+
if __name__ == "__main__":
|
126 |
+
import_csv("csv_file.csv")
|
127 |
+
|
128 |
+
*Example 2: Loading an event log stored in a CSV file and computing the start and end activities of the traces in the event log. If you run the code yourself, make sure to point the file path to the appropriate path on your computer containing the running example file.*
|
129 |
+
|
130 |
+
Note that, we now import pandas and pm4py. The first line of our script again loads the event log stored in CSV format as a data frame. The second line transforms the event data table into a format that can be used by any process mining algorithm in pm4py. That is, the **format_dataframe()**-function creates a copy of the input event log, and renames the assigned columns to standardized column names used in pm4py. In our example, the column case_id is renamed to case:concept:name, the activity column is renamed to concept:name and the timestamp column is renamed to time:timestamp. The underlying reasons for using the aforementioned standard names is primarily related to XES-based (the other file format that we will look at shortly) legacy. Hence, it is advisable to always import a csv based log as follows.
|
131 |
+
|
132 |
+
Note that, in this example, the value of the arguments, i.e., *sep*, *case_id*, *activity_key* and *timestamp_key* are depending on the input data. To obtain the activities that occur first and, respectively, last in any trace in the event log, we call the pm4py.get_start_activities(event_log) and the pm4py.get_end_activities(event_log) functions. The functions return a dictionary, containing the activities as a key, and, the number of observations (i.e., number of traces in which they occur first, respectively, last) in the event log.
|
133 |
+
|
134 |
+
PM4Py exploits a built-in pandas function to detect the format of the timestamps in the input data automatically. However, pandas looks at the timestamp values in each row in isolation. In some cases, this can lead to problems. For example, if the provided value is 2020-01-18, i.e., first the year, then the month, and then the day of the date, in some cases, a value of 2020-02-01 may be interpreted wrongly as January 2nd, i.e., rather than February 1st. To alleviate this problem, an additional parameter can be provided to the **format_dataframe()** method, i.e., the timest_format parameter. In this example, the timestamp format is %Y-%m-%d %H:%M:%S%z. In general, we advise to specify the timestamp format!
|
135 |
+
|
136 |
+
Loading XES Files
|
137 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
138 |
+
|
139 |
+
.. raw:: html
|
140 |
+
|
141 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/pmpN3A_h2sQ" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
142 |
+
<a href="https://www.youtube.com/embed/pmpN3A_h2sQ" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #3 Importing XES Files</a>
|
143 |
+
|
144 |
+
Next to CSV files, event data can also be stored in an XML-based format, i.e., in XES files. In an XES file, we can describe a containment relation, i.e., a log contains a number of traces, which in turn contain several events. Furthermore, an object, i.e., a log, trace, or event, is allowed to have attributes. The advantage is that certain data attributes that are constant for a log or a trace, can be stored at that level. For example, assume that we only know the total costs of a case, rather than the costs of the individual events. If we want to store this information in a CSV file, we either need to replicate this information (i.e., we can only store data in rows, which directly refer to events), or, we need to explicitly define that certain columns only get a value once, i.e., referring to case-level attributes. The XES standard more naturally supports the storage of this type of information.
|
145 |
+
|
146 |
+
Consider Figure 5, in which we depict a snapshot of the running example data stored in the .xes file format. The complete file can be downloaded
|
147 |
+
`here <https://pm4py.fit.fraunhofer.de/static/assets/data/getting_started/running-example.xes>`_.
|
148 |
+
|
149 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/csv_snapshot.png
|
150 |
+
|
151 |
+
*Figure 5: Running example xes data set.*
|
152 |
+
|
153 |
+
Observe that the trace with number 1 (reflected by the [string key=”concept:name”]-tag on line 9) is the first trace recorded in this event log. The first event of the trace represents the “register request” activity executed by Pete. The second event is the “examine thoroughly” activity, executed by Sue, etc. We will not elaborate on the XES standard in detail here, i.e., we refer to the `XES homepage <http://www.xes-standard.org/>`_, and, to our `video tutorial <https://www.youtube.com/watch?v=pmpN3A_h2sQ&t=1785s&ab_channel=ProcessMiningforPython>`_ on importing XES for more information.
|
154 |
+
|
155 |
+
Importing an XES file is fairly straightforward. PM4Py has a special **read_xes()**-function that can parse a given xes file and load it in PM4Py, i.e., as an Event Log object. Consider the following code snippet, in which we show how to import an XES event log. Like the previous example, the script outputs activities that can start and end a trace.
|
156 |
+
|
157 |
+
.. code-block:: python3
|
158 |
+
|
159 |
+
def import_xes(file_path):
|
160 |
+
event_log = pm4py.read_xes(file_path)
|
161 |
+
start_activities = pm4py.get_start_activities(event_log)
|
162 |
+
end_activities = pm4py.get_end_activities(event_log)
|
163 |
+
print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))
|
164 |
+
|
165 |
+
if __name__ == "__main__":
|
166 |
+
import_xes("C:/Users/demo/Downloads/running-example.xes")
|
167 |
+
|
168 |
+
Exporting Event Data
|
169 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
170 |
+
|
171 |
+
.. raw:: html
|
172 |
+
|
173 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/gVnfG6xLIxI" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
174 |
+
<a href="https://www.youtube.com/embed/gVnfG6xLIxI" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #6 exporting event data</a>
|
175 |
+
|
176 |
+
|
177 |
+
Now we are able to import event data into PM4Py, let’s take a look at the opposite, i.e., exporting event data. Exporting of event logs can be very useful, e.g., we might want to convert a .csv file into a .xes file or we might want to filter out certain (noisy) cases and save the filtered event log. Like importing, exporting of event data is possible in two ways, i.e., exporting to csv (using pandas) and exporting to xes. In the upcoming sections, we show how to export an event log stored as a pandas data frame into a csv file, a pandas data frame as a xes file, a PM4Py event log object as a csv file and finally, a PM4Py event log object as a xes file.
|
178 |
+
|
179 |
+
Storing a Pandas Data Frame as a csv file
|
180 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
181 |
+
|
182 |
+
Storing an event log that is represented as a pandas dataframe is straightforward, i.e., we can directly use the **to_csv** function of the pandas DataFrame object. Consider the following example snippet of code, in which we show this functionality.
|
183 |
+
|
184 |
+
Note that the example code imports the running example csv file as a pandas data frame, and, exports it to a csv file at the location ‘C:/Users/demo/Desktop/running-example-exported.csv’. Note that, by default, pandas uses a ‘,’-symbol rather than ‘;’-symbol as a column separator.
|
185 |
+
|
186 |
+
.. code-block:: python3
|
187 |
+
|
188 |
+
import pandas as pd
|
189 |
+
|
190 |
+
if __name__ == "__main__":
|
191 |
+
event_log = pm4py.format_dataframe(pd.read_csv('C:/Users/demo/Downloads/running-example.csv', sep=';'), case_id='case_id',
|
192 |
+
activity_key='activity', timestamp_key='timestamp')
|
193 |
+
event_log.to_csv('C:/Users/demo/Desktop/running-example-exported.csv')
|
194 |
+
|
195 |
+
Storing a Pandas Data Frame as a .xes file
|
196 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
197 |
+
|
198 |
+
It is also possible to store a pandas data frame to a xes file. This is simply done by calling the **pm4py.write_xes()** function. You can pass the dataframe as an input parameter to the function, i.e., pm4py handles the internal conversion of the dataframe to an event log object prior to writing it to disk. Note that this construct only works if you have formatted the data frame, i.e., as highlighted earlier in the importing CSV section.
|
199 |
+
|
200 |
+
.. code-block:: python3
|
201 |
+
|
202 |
+
import pandas
|
203 |
+
import pm4py
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
event_log = pm4py.format_dataframe(pandas.read_csv('C:/Users/demo/Downloads/running-example.csv', sep=';'), case_id='case_id',
|
207 |
+
activity_key='activity', timestamp_key='timestamp')
|
208 |
+
pm4py.write_xes(event_log, 'C:/Users/demo/Desktop/running-example-exported.xes')
|
209 |
+
|
210 |
+
Storing an Event Log object as a .csv file
|
211 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
212 |
+
|
213 |
+
In some cases, we might want to store an event log object, e.g., obtained by importing a .xes file, as a csv file. For example, certain (commercial) process mining tools only support csv importing. For this purpose, pm4py offers conversion functionality that allows you to convert your event log object into a data frame, which you can subsequently export using pandas.
|
214 |
+
|
215 |
+
.. code-block:: python3
|
216 |
+
|
217 |
+
import pm4py
|
218 |
+
|
219 |
+
if __name__ == "__main__":
|
220 |
+
event_log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
221 |
+
df = pm4py.convert_to_dataframe(event_log)
|
222 |
+
df.to_csv('C:/Users/demo/Desktop/running-example-exported.csv')
|
223 |
+
|
224 |
+
Storing an Event Log object as a .xes file
|
225 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
226 |
+
|
227 |
+
Storing an event log object as a .xes file is rather straightforward. In pm4py, the **write_xes()** method allows us to do so. Consider the simple example script below in which we show an example of this functionality.
|
228 |
+
|
229 |
+
.. code-block:: python3
|
230 |
+
|
231 |
+
import pm4py
|
232 |
+
|
233 |
+
if __name__ == "__main__":
|
234 |
+
event_log = pm4py.read_xes(C:/Users/demo/Downloads/running-example.xes)
|
235 |
+
pm4py.write_xes(event_log, 'C:/Users/demo/Desktop/running-example-exported.xes')
|
236 |
+
|
237 |
+
Pre-Built Event Log Filters
|
238 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
239 |
+
|
240 |
+
.. raw:: html
|
241 |
+
|
242 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/alkZkhK2mAo" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
243 |
+
<a href="https://www.youtube.com/embed/alkZkhK2mAo" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #5: Playing with Event Data; Shipped Filters</a>
|
244 |
+
|
245 |
+
There are various pre-built filters in PM4Py, which make commonly needed process mining filtering functionality a lot easier. In the upcoming list, we briefly give an overview of these functions. We describe how to call them, their main input parameters and their return objects.
|
246 |
+
|
247 |
+
* **filter_start_activities(log, activities, retain=True)**; This function filters the given event log object (either a data frame or a PM4Py event log object) based on a given set of input activity names that need to occur at the starting point of a trace. If we set retain to False, we remove all traces that contain any of the specified activities as their first event.
|
248 |
+
* **filter_end_activities(log, activities, retain=True)**; Similar functionality to the start activity filter. However, in this case, the filter is applied for the activities that occur at the end of a trace.
|
249 |
+
* **filter_event_attribute_values(log, attribute_key, values, level="case", retain=True)**; Filters an event log (either data frame or PM4Py EventLog object) on event attributes. The attribute_key is a string representing the attribute key to filter, the values parameter allows you to specify a set of allowed values. If the level parameter is set to 'case', then any trace that contains at least one event that matches the attribute-value combination is retained. If the level parameter value is set to 'event', only the events are retained that describe the specified value. Setting retain to False inverts the filter.
|
250 |
+
* **filter_trace_attribute_values(log, attribute_key, values, retain=True)**; Keeps (or removes if retain is set to False) only the traces that have an attribute value for the provided attribute_key and listed in the collection of corresponding values.
|
251 |
+
* **filter_variants(log, variants, retain=True)**; Keeps those traces that correspond to a specific activity execution sequence, i.e., known as a variant. For example, in a large log, we want to retain all traces that describe the execution sequence 'a', 'b', 'c'. The variants parameter is a collection of lists of activity names.
|
252 |
+
* **filter_directly_follows_relation(log, relations, retain=True)**; This function filters all traces that contain a specified 'directly follows relation'. Such a relation is simply a pair of activities, e.g., ('a','b') s.t., 'a' is directly followed by 'b' in a trace. For example, the trace <'a','b','c','d'> contains directly follows pairs ('a','b'), ('b','c') and ('c','d'). The relations parameter is a set of tuples, containing activity names. The retain parameter allows us to express whether or not we want to keep or remove the mathcing traces.
|
253 |
+
* **filter_eventually_follows_relation(log, relations, retain=True)** This function allows us to match traces on a generalization of the directly follows relation, i.e., an arbitrary number of activities is allowed to occur in-between the input relations. For example, when we call the function with a relation ('a','b'), any trace in which we observe activity 'a' at some point, to be followed later by activity 'b', again at some point, adheres to this filter. For example, a trace <'a','b','c','d'> contains eventually follows pairs ('a','b'), ('a','c') ('a','d'), ('b','c'), ('b','d') and ('c','d'). Again, the relations parameter is a set of tuples, containing activity names and the retain parameter allows us to express whether or not we want to keep or remove the matching traces.
|
254 |
+
* **filter_time_range(log, dt1, dt2, mode='events')**; Filters the event log based on a given time range, defined by timestamps dt1 and dt2. The timestamps should be of the form datetime.datetime. The filter has three modes (default: 'events'):
|
255 |
+
|
256 |
+
* *'events'*; Retains all events that fall in the provided time range. Removes any empty trace in the filtered event log.
|
257 |
+
* *'traces_contained'*; Retains any trace that is completely 'contained' within the given time frame. For example, this filter is useful if one is interested to retain all full traces in a specific day/month/year.
|
258 |
+
* *'traces_intersecting'*; Retains any trace that has at least one event that falls into the given time range.
|
259 |
+
|
260 |
+
Consider the example code below, in which we provide various example applications of the mentioned filtering functions, using the running example event log. Try to copy-paste each line in your own environment and play around with the resulting filtered event log to get a good idea of the functionality of each filter. Note that, all functions shown below also work when providing a dataframe as an input!
|
261 |
+
|
262 |
+
.. code-block:: python3
|
263 |
+
|
264 |
+
import pm4py
|
265 |
+
import datetime as dt
|
266 |
+
|
267 |
+
if __name__ == "__main__":
|
268 |
+
log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
269 |
+
|
270 |
+
filtered = pm4py.filter_start_activities(log, {'register request'})
|
271 |
+
|
272 |
+
filtered = pm4py.filter_start_activities(log, {'register request TYPO!'})
|
273 |
+
|
274 |
+
filtered = pm4py.filter_end_activities(log, {'pay compensation'})
|
275 |
+
|
276 |
+
filtered = pm4py.filter_event_attribute_values(log, 'org:resource', {'Pete', 'Mike'})
|
277 |
+
|
278 |
+
filtered = pm4py.filter_event_attribute_values(log, 'org:resource', {'Pete', 'Mike'}, level='event')
|
279 |
+
|
280 |
+
filtered = pm4py.filter_trace_attribute_values(log, 'concept:name', {'3', '4'})
|
281 |
+
|
282 |
+
filtered = pm4py.filter_trace_attribute_values(log, 'concept:name', {'3', '4'}, retain=False)
|
283 |
+
|
284 |
+
filtered = pm4py.filter_variants(log, [
|
285 |
+
['register request', 'check ticket', 'examine casually', 'decide', 'pay compensation']])
|
286 |
+
|
287 |
+
filtered = pm4py.filter_variants(log, [
|
288 |
+
['register request', 'check ticket', 'examine casually', 'decide', 'reject request']])
|
289 |
+
|
290 |
+
filtered = pm4py.filter_directly_follows_relation(log, [('check ticket', 'examine casually')])
|
291 |
+
|
292 |
+
filtered = pm4py.filter_eventually_follows_relation(log, [('examine casually', 'reject request')])
|
293 |
+
|
294 |
+
filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31), mode='events')
|
295 |
+
|
296 |
+
filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31),
|
297 |
+
mode='traces_contained')
|
298 |
+
|
299 |
+
filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31),
|
300 |
+
mode='traces_intersecting')
|
301 |
+
|
302 |
+
|
303 |
+
Discovering Your First Process Model
|
304 |
+
------------------------------------
|
305 |
+
|
306 |
+
Since we have studied basic conceptual knowledge of process mining and event data munging and crunching, we focus on process discovery. As indicated, the goal is to discover, i.e., primarily completely automated and algorithmically, a process model that accurately describes the process, i.e., as observed in the event data. For example, given the running example event data, we aim to discover the process model that we have used to explain the running example's process behavior, i.e., Figure 3. This section briefly explains what modeling formalisms exist in PM4Py while applying different process discovery algorithms. Secondly, we give an overview of the implemented process discovery algorithms, their output type(s), and how we can invoke them. Finally, we discuss the challenges of applying process discovery in practice.
|
307 |
+
|
308 |
+
.. raw:: html
|
309 |
+
|
310 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/BJMp763Ye_o" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
311 |
+
<a href="https://www.youtube.com/embed/BJMp763Ye_o" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #7 process discovery</a>
|
312 |
+
|
313 |
+
Obtaining a Process Model
|
314 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
315 |
+
|
316 |
+
There are three different process modeling notations that are currently supported in PM4Py. These notations are: BPMN, i.e., models such as the ones shown earlier in this tutorial, Process Trees and Petri nets. A Petri net is a more mathematical modeling representation compared to BPMN. Often the behavior of a Petri net is more difficult to comprehend compared to BPMN models. However, due to their mathematical nature, Petri nets are typically less ambiguous (i.e., confusion about their described behavior is not possible). Process Trees represent a strict subset of Petri nets and describe process behavior in a hierarchical manner. In this tutorial, we will focus primarily on BPMN models and process trees. For more information about Petri nets and their application to (business) process modeling (from a ‘workflow’ perspective), we refer to
|
317 |
+
`this article <https://www.researchgate.net/profile/Wil_Aalst/publication/220337578_The_Application_of_Petri_Nets_to_Workflow_Management/links/0deec517a563a45197000000/The-Application-of-Petri-Nets-to-Workflow-Management.pdf?_sg%5B0%5D=2TrqDbNsoZEr67XgOwI_9qxtlO_S1HJFHn8edW7aE0fMWzmsY0D1GhrsbRXdtZhTLvQ1KcSm9pkLzooDMl-eRg.DhnNamQg4EvK8MAwucwkB1VDke7eNq0E4jxMAa2IMXXZtvr9k1PPiwZpQEt1Z2iqkdkN-SOlWyjFloP-BivLow&_sg%5B1%5D=XeHToX2_7feAtM6yO395-HEYttSzdWJeiLaGlD_7Dn3hRXYnVXya0-dHm5RWmjX22gF3ton7d7FSzF6FjL_NYZCQzRvJuPg4zPWnk_HCe0xj.DhnNamQg4EvK8MAwucwkB1VDke7eNq0E4jxMAa2IMXXZtvr9k1PPiwZpQEt1Z2iqkdkN-SOlWyjFloP-BivLow&_iepl=>`_.
|
318 |
+
|
319 |
+
Interestingly, none of the algorithms implemented in PM4Py directly discovers a BPMN model. However, any process tree can easily be translated to a BPMN model. Since we have already discussed the basic operators of BPMN models, we will start with the discovery of a process tree, which we convert to a BPMN model. Later, we will study the ‘underlying’ process tree. The algorithm that we are going to use is the ‘Inductive Miner’; More details about the (inner workings of the) algorithm can be found in
|
320 |
+
`this presentation <http://www.processmining.org/_media/presentations/2013/petri_nets.pptx>`_ and in `this article <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.396.197&rep=rep1&type=pdf>`_. Consider the following code snippet. We discover a BPMN model (using a conversion from process tree to BPMN) using the inductive miner, based on the running example event data set.
|
321 |
+
|
322 |
+
.. code-block:: python3
|
323 |
+
|
324 |
+
import pm4py
|
325 |
+
|
326 |
+
if __name__ == "__main__":
|
327 |
+
log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
328 |
+
|
329 |
+
process_tree = pm4py.discover_process_tree_inductive(log)
|
330 |
+
bpmn_model = pm4py.convert_to_bpmn(process_tree)
|
331 |
+
pm4py.view_bpmn(bpmn_model)
|
332 |
+
|
333 |
+
|
334 |
+
Note that the resulting process model is the following image:
|
335 |
+
|
336 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/bpmn_inductive_running_example.png
|
337 |
+
|
338 |
+
*Figure 6: BPMN model discovered based on the running example event data set, using the Inductive Miner implementation of PM4Py.*
|
339 |
+
|
340 |
+
Observe that the process model that we discovered, is indeed the same model as the model that we have used before, i.e., as shown in Figure 3.
|
341 |
+
|
342 |
+
As indicated, the algorithm used in this example actually discovers a Process Tree. Such a process tree is, mathematically speaking, a
|
343 |
+
`rooted tree <https://en.wikipedia.org/wiki/Tree_(graph_theory)>`_ annotated with ‘control-flow’ information. We’ll first use the following code snippet to discover a process tree based on the running example, and, afterwards shortly analyze the model.
|
344 |
+
|
345 |
+
.. code-block:: python3
|
346 |
+
|
347 |
+
import pm4py
|
348 |
+
|
349 |
+
if __name__ == "__main__":
|
350 |
+
log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
351 |
+
|
352 |
+
process_tree = pm4py.discover_process_tree_inductive(log)
|
353 |
+
pm4py.view_process_tree(process_tree)
|
354 |
+
|
355 |
+
|
356 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/process_tree_running_example.png
|
357 |
+
|
358 |
+
*Figure 7: Process Tree model discovered based on the running example event data set, using the Inductive Miner implementation of PM4Py.*
|
359 |
+
|
360 |
+
We the process tree model from top to bottom. The first circle, i.e., the ‘root’ of the process tree, describes a ‘->’ symbol. This means that, when srolling further down, the process described by the model executes the ‘children’ of the root from left to right. Hence, first “register request” is executed, followed by the circle node with the ‘*’ symbol, finally to be followed by the node with the ‘X’ symbol. The node with the ‘*’ represents ‘repeated behavior’, i.e., the possibility to repeat the behavior. When scrolling further down, the left-most ‘subtree’ of the ‘*’-operator is always executed, the right-most child (in this case, “reinitiate request”) triggers a repeated execution of the left-most child. Observe that this is in line with the process models we have seen before, i.e., the “reinitiate request” activity allows us to repeat the behavior regarding examinations and checking the ticket. When we go further down below in the subtree of the ‘*’-operator, we again observe a ‘->’ node. Hence, its left-most child is executed first, followed by its right-most child (“decide”). The left-most child of the ‘->’ node has a ‘+’ symbol. This represents concurrent behavior; hence, its children can be executed simultaneously or in any order. Its left-most child is the “check ticket” activity. Its right-most child is a node with an ‘X’ symbol (just like the right-most child of the tree's root). This represents an exclusive choice, i.e., one of the children is executed (either “examine casually” or “examine thoroughly”). Observe that the process tree describes the exact same behavior as the BPMN models shown before.
|
361 |
+
|
362 |
+
Obtaining a Process Map
|
363 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
364 |
+
|
365 |
+
Many `commercial process mining solutions <https://www.gartner.com/reviews/market/process-mining>`_ do not provide extended support for discovering process models. Often, as a main visualization of processes, process maps are used. A process map contains activities and connections (by means of arcs) between them. A connection between two activities usually means that there some form of precedence relation. In its simplest form, it means that the ‘source’ activity directly precedes the ‘target’ activity. Let’s quickly take a look at a concrete example! Consider the following code snippet, in which we learn a ‘Directly Follows Graph’ (DFG)-based process map:
|
366 |
+
|
367 |
+
.. code-block:: python3
|
368 |
+
|
369 |
+
import pm4py
|
370 |
+
|
371 |
+
if __name__ == "__main__":
|
372 |
+
log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
373 |
+
|
374 |
+
dfg, start_activities, end_activities = pm4py.discover_dfg(log)
|
375 |
+
pm4py.view_dfg(dfg, start_activities, end_activities)
|
376 |
+
|
377 |
+
|
378 |
+
|
379 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/dfg_running_example.png
|
380 |
+
|
381 |
+
*Figure 8: Process Map (DFG-based) discovered based on the running example event data set.*
|
382 |
+
|
383 |
+
The **pm4py.discover_dfg(log)** function returns a triple. The first result, i.e., called dfg in this example, is a dictionary mapping pairs of activities that follow each other directly, to the number of corresponding observations. The second and third arguments are the start and end activities observed in the event log (again counters). In the visualization, the green circle represents the start of any observed process instance. The orange circle represents the end of an observed process instance. In 6 cases, the register request is the first activity observed (represented by the arc labeled with value 6). In the event log, the check ticket activity is executed directly after the register request activity. The examine thoroughly activity is following registration once, examine casually follows 3 times. Note that, indeed, in total, the register activity is followed by 6 different events, i.e., there are 6 traces in the running example event log. However, note that there are typically much more relations observable compared to the number of cases in an event log. Even using this simple event data, the DFG-based process map of the process is much more complex than the process models learned earlier. Furthermore, it is much more difficult to infer the actual execution of the process based on the process map. Hence, when using process maps, one should be very carefully when trying to comprehend the actual process.
|
384 |
+
|
385 |
+
In PM4Py, we also implemented the `Heuristics Miner <https://ieeexplore.ieee.org/iel5/5937059/5949295/05949453.pdf>`_, a more advanced process map discovery algorithm, compared to its DFG-based alternative. We won’t go into the algorithmic details here, however, in a HM-based process map, the arcs between activities represent observed concurrency. For example, the algorithm is able to detect that the ticket check and examination are concurrent. Hence, these activities will not be connected in the process map. As such, a HM-based process map is typically simpler compared to a DFG-based process map.
|
386 |
+
|
387 |
+
.. code-block:: python3
|
388 |
+
|
389 |
+
import pm4py
|
390 |
+
|
391 |
+
if __name__ == "__main__":
|
392 |
+
log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes')
|
393 |
+
|
394 |
+
map = pm4py.discover_heuristics_net(log)
|
395 |
+
pm4py.view_heuristics_net(map)
|
396 |
+
|
397 |
+
|
398 |
+
.. image:: https://pm4py.fit.fraunhofer.de/static/assets/images/getting_started/hnet_running_example.png
|
399 |
+
|
400 |
+
*Figure 9: Process Map (HM-based) discovered based on the running example event data set.*
|
401 |
+
|
402 |
+
|
403 |
+
Conformance Checking
|
404 |
+
------------------------------------
|
405 |
+
|
406 |
+
.. raw:: html
|
407 |
+
|
408 |
+
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/0YNvijqX3FY" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>-->
|
409 |
+
<a href="https://www.youtube.com/embed/0YNvijqX3FY" target="_blank" rel="noopener noreferrer">→ Watch on YouTube: pm4py tutorials - tutorial #8 conformance checking</a>
|
410 |
+
|
pm4py/docs/source/index.rst
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Welcome to pm4py's Documentation!
|
2 |
+
===================================
|
3 |
+
|
4 |
+
``pm4py`` is a Python library implementing a variety of `process mining <https://en.wikipedia.org/wiki/Process_mining>`_ algorithms.
|
5 |
+
|
6 |
+
A simple example of ``pm4py`` in action:
|
7 |
+
|
8 |
+
.. code-block:: python
|
9 |
+
|
10 |
+
import pm4py
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
log = pm4py.read_xes('<path-to-xes-log-file.xes>')
|
14 |
+
process_model = pm4py.discover_bpmn_inductive(log)
|
15 |
+
pm4py.view_bpmn(process_model)
|
16 |
+
|
17 |
+
In this documentation, you can find all relevant information to set up ``pm4py`` and start your process mining journey.
|
18 |
+
Please consult the Contents listed below to navigate the documentation.
|
19 |
+
|
20 |
+
Happy #processmining!
|
21 |
+
|
22 |
+
|
23 |
+
Contents
|
24 |
+
--------
|
25 |
+
|
26 |
+
.. toctree::
|
27 |
+
:maxdepth: 2
|
28 |
+
|
29 |
+
install
|
30 |
+
getting_started
|
31 |
+
api
|
32 |
+
release_notes
|
pm4py/docs/source/install.rst
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Installation
|
2 |
+
============
|
3 |
+
|
4 |
+
pip
|
5 |
+
---
|
6 |
+
|
7 |
+
To use ``pm4py`` on any OS, install it using ``pip``:
|
8 |
+
|
9 |
+
.. code-block:: console
|
10 |
+
|
11 |
+
(.venv) $ pip install pm4py
|
12 |
+
|
13 |
+
``pmp4y`` uses the ``Graphviz`` library for rendering visualizations.
|
14 |
+
Please install `Graphviz <https://graphviz.org/download/>`_.
|
15 |
+
|
16 |
+
After installation, GraphViz is located in the ``program files`` directory.
|
17 |
+
The ``bin\`` folder of the GraphViz directory needs to be added manually to the ``system path``.
|
18 |
+
In order to do so, please follow `this instruction <https://stackoverflow.com/questions/44272416/how-to-add-a-folder-to-path-environment-variable-in-windows-10-with-screensho>`_.
|
19 |
+
|
20 |
+
Docker
|
21 |
+
------
|
22 |
+
To install pm4py via Docker, use:
|
23 |
+
|
24 |
+
.. code-block:: console
|
25 |
+
|
26 |
+
$ docker pull pm4py/pm4py-core:latest
|
27 |
+
|
28 |
+
To run pm4py via docker, use:
|
29 |
+
|
30 |
+
.. code-block:: console
|
31 |
+
|
32 |
+
$ docker run -it pm4py/pm4py-core:latest bash
|
pm4py/docs/source/modules.rst
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pm4py-source
|
2 |
+
============
|
3 |
+
|
4 |
+
.. toctree::
|
5 |
+
:maxdepth: 4
|
6 |
+
|
7 |
+
pm4py
|
pm4py/docs/source/pm4py-logo.png
ADDED
![]() |
pm4py/docs/source/release_notes.rst
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Release Notes
|
2 |
+
=============
|
3 |
+
|
4 |
+
|
5 |
+
pm4py 2.7.0 - Release Notes
|
6 |
+
---------------------------
|
7 |
+
|
8 |
+
The major changes in pm4py 2.7.0 are as follows:
|
9 |
+
|
10 |
+
1. We added an initial integration to ChatGPT
|
11 |
+
|
12 |
+
2. We added some connectors for workstation-supported processes (Outlook mail and calendar; web browsers).
|
13 |
+
|
14 |
+
|
15 |
+
pm4py 2.6.0 - Release Notes
|
16 |
+
---------------------------
|
17 |
+
|
18 |
+
The major changes in pm4py 2.6.0 are as follows:
|
19 |
+
|
20 |
+
1. We added the ILP Miner as process discovery algorithm
|
21 |
+
|
22 |
+
2. We added two log filters: "timestamp grouping" and "consecutive activities"
|
23 |
+
|
24 |
+
3. We added the insertion of the case arrival/finish rate and of the waiting/service/sojourn times
|
25 |
+
in the simplified interface
|
26 |
+
|
27 |
+
4. We added a baseline clustering algorithm, based on the pre-existing feature extraction
|
28 |
+
|
29 |
+
5. We added the extraction of the "target vector" from event logs for machine learning purposes
|
30 |
+
|
31 |
+
|
32 |
+
pm4py 2.5.0 - Release Notes
|
33 |
+
---------------------------
|
34 |
+
|
35 |
+
The major changes in pm4py 2.5.0 are as follows:
|
36 |
+
|
37 |
+
1. We added the Cardoso and extended Cardoso simplicity metrics to pm4py
|
38 |
+
|
39 |
+
2. We added discovery of Stochastic Arc Weight nets based on OCEL logs.
|
40 |
+
|
41 |
+
3. We added Murata-based Petri net simplification to the simplified interface (implicit place removal)
|
42 |
+
|
43 |
+
|
44 |
+
pm4py 2.4.0 - Release Notes
|
45 |
+
---------------------------
|
46 |
+
|
47 |
+
Today, we released pm4py 2.4.0.
|
48 |
+
We have adopted our release policy slightly, i.e., as of now, the pm4py versioning follows the MAJOR.MINOR.FIX pattern.
|
49 |
+
We will also report all MAJOR and MINOR releases in the release notes.
|
50 |
+
|
51 |
+
As today's release is a minor release, we report on the main changes here.
|
52 |
+
|
53 |
+
1. We added the Murata algorithm (Berthelot implementation) to remove the structurally redundant places, which is now available in the simplified interface.
|
54 |
+
|
55 |
+
2. We added the reduction of invisible transitions in Petri nets to the simplified interface.
|
56 |
+
|
57 |
+
3. We added support for calcuating stochastic languages of process models
|
58 |
+
|
59 |
+
4. We adde support for calculating EMD between two stochastic languages
|
60 |
+
|
61 |
+
5. we added a visualization of alignments in simplified interface
|
62 |
+
|
63 |
+
6. We added visualization of footprint table in simplified interface
|
64 |
+
|
65 |
+
7. We added a conversion of Petri net objects to networkX DiGraphs
|
66 |
+
|
67 |
+
8. We added support for stochastic Petri nets
|
68 |
+
|
69 |
+
9. We added support for stochastic arc-weight nets (the paper describing this class of nets is submitted to the Petri nets 2023 conference)
|
70 |
+
|
71 |
+
pm4py 2.3.0 - Release Notes
|
72 |
+
---------------------------
|
73 |
+
Finally, pm4py 2.3.0 has arrived!
|
74 |
+
The 2.3.0 release contains various significant updates and improvements concerning its predecessors.
|
75 |
+
The release consists of approximately 550 commits and 47.000 LoC!
|
76 |
+
The main changes are as follows:
|
77 |
+
|
78 |
+
1. *Flexible parameter passing in the simplified method invocation*, e.g., :meth:`pm4py.discovery.discover_petri_net_inductive`;
|
79 |
+
For example, in ```pm4py``` 2.2.X, the columns used in process discovery were fixed (i.e., case:concept:name, concept:name, time:timestamp). Hence, changing the perspective implied changing column headers.
|
80 |
+
In pm4py 2.3.X, the columns used in process discovery are now part of the function arguments.
|
81 |
+
A simple comparison:
|
82 |
+
* Discovering a Petri net in pm4py 2.2.X:
|
83 |
+
``pm4py.discover_petri_net_inductive(dataframe, noise_threshold=0.2)``
|
84 |
+
|
85 |
+
* Discovering a Petri net in pm4py 2.3.X:
|
86 |
+
``pm4py.discover_petri_net_inductive(dataframe, noise_threshold=0.2, activity_key="activity", timestamp_key="timestamp", case_id_key="case")``
|
87 |
+
|
88 |
+
2. *Dataframes are primary citizens*;
|
89 |
+
pm4py used to support both Pandas ``Dataframes`` and our custom-defined event log object. We have decided to adapt all algorithms to work on Dataframes. As such, event data is expected to be represented as a Dataframe in pm4py (i.e., we are dropping the explicit use of our custom event log object). There are two main reasons for this design decision:
|
90 |
+
1. *Performance*; Generally, Pandas Dataframes are performing significantly better on most operations compared to our custom event log object
|
91 |
+
2. *Practice*; Most real event data is of tabular form.
|
92 |
+
|
93 |
+
Of course, pm4py still supports importing .xes files. However, when importing an event log using :meth:`pm4py.read.read_xes`, the object is directly converted into a Dataframe.
|
94 |
+
A general drawback of this design decision is that pm4py no longer appropriately supports nested objects (generally supported by the .xes standard). However, as indicated in point b), such nested objects are rarely used in practice.
|
95 |
+
|
96 |
+
3. *Typing Information in the simplified interface*;
|
97 |
+
All methods in the simplified interface are guaranteed to have typing information on their input and output objects.
|
98 |
+
|
99 |
+
4. *Variant Representation*;
|
100 |
+
In pm4py 2.3.X, trace variants are represented as a tuple of Strings (representing activity names) instead of a String where a ‘,’ symbol indicates activity separation. For example, a variant of the form <A,B,C> is now represented as a tuple (‘A’,’B’,’C’) and was previously represented as ‘A,B,C’. This fix allows activity names to contain a ‘,’ symbol.
|
101 |
+
|
102 |
+
5. *Inductive Miner Revised*;
|
103 |
+
We have re-implemented and restructured the code of the inductive miner. The new version is closer to the reference implementation in ProM and is more performant than the previous version.
|
104 |
+
|
105 |
+
6. *Business Hours Revised*;
|
106 |
+
The business hours functionality in pm4py has been revised completely. In pm4py 2.2.X, one could only specify the working days and hours, which were fixed. In pm4py 2.3.X, one can define week-day-based activity slots (e.g., to model breaks). One slot, i.e., one tuple consists of one start and one end time given in seconds since week start, e.g. [(7 * 60 * 60, 17 * 60 * 60), ((24 + 7) * 60 * 60, (24 + 12) * 60 * 60), ((24 + 13) * 60 * 60, (24 + 17) * 60 * 60),] meaning that business hours are Mondays 07:00 - 17:00 and Tuesdays 07:00 - 12:00 and 13:00 - 17:00
|
107 |
+
|
108 |
+
7. *Auto-Generated Docs*;
|
109 |
+
As you may have noticed, this website serves as the new documentation hub for pm4py. It contains all previously available information on the project website related to ‘installation’ and ‘getting started’. For the simplified interface, we have merged the general documentation with the API docs to improve the overall understanding of working with pm4py. The docs are now generated directly from the pm4py source. Hence, feel free to share a pull request if you find any issues.
|
110 |
+
|
111 |
+
|
112 |
+
Happy #processmining!
|
113 |
+
|
114 |
+
The #pm4py development team.
|
pm4py/examples/CHECK_MISSING.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
|
4 |
+
if __name__ == "__main__":
|
5 |
+
files = [x.split(".")[0] for x in os.listdir(".") if x.endswith(".py") and not "execute_everything" in x and not "CHECK_MISSING" in x]
|
6 |
+
contents_ex_everything = open("execute_everything.py", "r").read()
|
7 |
+
for f in files:
|
8 |
+
if f not in contents_ex_everything:
|
9 |
+
print(f)
|
pm4py/examples/activities_to_alphabet.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
from pm4py.objects.log.util import activities_to_alphabet
|
3 |
+
from pm4py.util import constants
|
4 |
+
|
5 |
+
|
6 |
+
def execute_script():
|
7 |
+
dataframe = pm4py.read_xes("../tests/input_data/running-example.xes", return_legacy_log_object=False)
|
8 |
+
renamed_dataframe = activities_to_alphabet.apply(dataframe, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"})
|
9 |
+
print(renamed_dataframe)
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
execute_script()
|
pm4py/examples/activity_position.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
from pm4py.util import constants, pandas_utils
|
3 |
+
from pm4py.objects.log.util import dataframe_utils
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
def execute_script():
|
8 |
+
dataframe = pandas_utils.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"))
|
9 |
+
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["time:timestamp"])
|
10 |
+
# prints the summary of the positions of two activities
|
11 |
+
print(pm4py.get_activity_position_summary(dataframe, "Confirmation of receipt"))
|
12 |
+
print(pm4py.get_activity_position_summary(dataframe, "T02 Check confirmation of receipt"))
|
13 |
+
|
14 |
+
|
15 |
+
if __name__ == "__main__":
|
16 |
+
execute_script()
|
pm4py/examples/align_approx_pt.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import importlib.util
|
3 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive
|
4 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
5 |
+
from pm4py.algo.conformance.alignments.process_tree import algorithm as align_approx
|
6 |
+
from pm4py.objects.petri_net.utils.align_utils import pretty_print_alignments
|
7 |
+
from examples import examples_conf
|
8 |
+
|
9 |
+
|
10 |
+
def execute_script():
|
11 |
+
log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
|
12 |
+
|
13 |
+
log = xes_importer.apply(log_path)
|
14 |
+
tree = inductive.apply(log)
|
15 |
+
|
16 |
+
if importlib.util.find_spec("graphviz"):
|
17 |
+
from pm4py.visualization.process_tree import visualizer as pt_vis
|
18 |
+
gviz = pt_vis.apply(tree, parameters={pt_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT})
|
19 |
+
pt_vis.view(gviz)
|
20 |
+
|
21 |
+
print("start calculate approximated alignments")
|
22 |
+
approx_alignments = align_approx.apply(log, tree)
|
23 |
+
pretty_print_alignments(approx_alignments)
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
execute_script()
|
pm4py/examples/align_decomposition_ex_paper.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.objects.petri_net.obj import PetriNet, Marking
|
2 |
+
from pm4py.objects.petri_net.utils.petri_utils import add_arc_from_to
|
3 |
+
from pm4py.objects.petri_net.utils import decomposition
|
4 |
+
from examples import examples_conf
|
5 |
+
import importlib.util
|
6 |
+
|
7 |
+
|
8 |
+
def execute_script():
|
9 |
+
net = PetriNet("")
|
10 |
+
start = PetriNet.Place("start")
|
11 |
+
end = PetriNet.Place("end")
|
12 |
+
c1 = PetriNet.Place("c1")
|
13 |
+
c2 = PetriNet.Place("c2")
|
14 |
+
c3 = PetriNet.Place("c3")
|
15 |
+
c4 = PetriNet.Place("c4")
|
16 |
+
c5 = PetriNet.Place("c5")
|
17 |
+
c6 = PetriNet.Place("c6")
|
18 |
+
c7 = PetriNet.Place("c7")
|
19 |
+
c8 = PetriNet.Place("c8")
|
20 |
+
c9 = PetriNet.Place("c9")
|
21 |
+
net.places.add(c1)
|
22 |
+
net.places.add(c2)
|
23 |
+
net.places.add(c3)
|
24 |
+
net.places.add(c4)
|
25 |
+
net.places.add(c5)
|
26 |
+
net.places.add(c6)
|
27 |
+
net.places.add(c7)
|
28 |
+
net.places.add(c8)
|
29 |
+
net.places.add(c9)
|
30 |
+
net.places.add(start)
|
31 |
+
net.places.add(end)
|
32 |
+
t1 = PetriNet.Transition("t1", "a")
|
33 |
+
t2 = PetriNet.Transition("t2", None)
|
34 |
+
t3 = PetriNet.Transition("t3", "b")
|
35 |
+
t4 = PetriNet.Transition("t4", "c")
|
36 |
+
t5 = PetriNet.Transition("t5", "d")
|
37 |
+
t6 = PetriNet.Transition("t6", "e")
|
38 |
+
t7 = PetriNet.Transition("t7", None)
|
39 |
+
t8 = PetriNet.Transition("t8", "f")
|
40 |
+
t9 = PetriNet.Transition("t9", "g")
|
41 |
+
t10 = PetriNet.Transition("t10", "h")
|
42 |
+
t11 = PetriNet.Transition("t11", None)
|
43 |
+
net.transitions.add(t1)
|
44 |
+
net.transitions.add(t2)
|
45 |
+
net.transitions.add(t3)
|
46 |
+
net.transitions.add(t4)
|
47 |
+
net.transitions.add(t5)
|
48 |
+
net.transitions.add(t6)
|
49 |
+
net.transitions.add(t7)
|
50 |
+
net.transitions.add(t8)
|
51 |
+
net.transitions.add(t9)
|
52 |
+
net.transitions.add(t10)
|
53 |
+
net.transitions.add(t11)
|
54 |
+
add_arc_from_to(start, t1, net)
|
55 |
+
add_arc_from_to(t1, c1, net)
|
56 |
+
add_arc_from_to(t1, c2, net)
|
57 |
+
add_arc_from_to(c1, t2, net)
|
58 |
+
add_arc_from_to(c1, t3, net)
|
59 |
+
add_arc_from_to(c2, t4, net)
|
60 |
+
add_arc_from_to(t2, c3, net)
|
61 |
+
add_arc_from_to(t3, c3, net)
|
62 |
+
add_arc_from_to(t4, c4, net)
|
63 |
+
add_arc_from_to(c3, t5, net)
|
64 |
+
add_arc_from_to(c4, t5, net)
|
65 |
+
add_arc_from_to(t5, c5, net)
|
66 |
+
add_arc_from_to(c5, t6, net)
|
67 |
+
add_arc_from_to(t6, c1, net)
|
68 |
+
add_arc_from_to(t6, c2, net)
|
69 |
+
add_arc_from_to(c5, t7, net)
|
70 |
+
add_arc_from_to(t7, c7, net)
|
71 |
+
add_arc_from_to(t7, c6, net)
|
72 |
+
add_arc_from_to(c7, t8, net)
|
73 |
+
add_arc_from_to(c6, t9, net)
|
74 |
+
add_arc_from_to(t8, c8, net)
|
75 |
+
add_arc_from_to(t9, c9, net)
|
76 |
+
add_arc_from_to(c8, t11, net)
|
77 |
+
add_arc_from_to(c9, t11, net)
|
78 |
+
add_arc_from_to(t11, end, net)
|
79 |
+
add_arc_from_to(c5, t10, net)
|
80 |
+
add_arc_from_to(t10, end, net)
|
81 |
+
im = Marking()
|
82 |
+
im[start] = 1
|
83 |
+
fm = Marking()
|
84 |
+
fm[end] = 1
|
85 |
+
decomposed_net = decomposition.decompose(net, im, fm)
|
86 |
+
gvizs = []
|
87 |
+
|
88 |
+
if importlib.util.find_spec("graphviz"):
|
89 |
+
from pm4py.visualization.petri_net import visualizer
|
90 |
+
|
91 |
+
gvizs.append(visualizer.apply(net, im, final_marking=fm, parameters={"format": examples_conf.TARGET_IMG_FORMAT}))
|
92 |
+
visualizer.view(gvizs[len(gvizs) - 1])
|
93 |
+
for snet, sim, sfm in decomposed_net:
|
94 |
+
gvizs.append(visualizer.apply(snet, sim, final_marking=sfm, parameters={"format": examples_conf.TARGET_IMG_FORMAT}))
|
95 |
+
visualizer.view(gvizs[len(gvizs) - 1])
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
execute_script()
|
pm4py/examples/align_decomposition_example.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
2 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
|
3 |
+
from pm4py.algo.conformance.alignments.decomposed import algorithm as dec_align
|
4 |
+
from pm4py.algo.evaluation.replay_fitness import algorithm as rep_fit
|
5 |
+
from pm4py.objects.conversion.process_tree import converter as process_tree_converter
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
|
9 |
+
|
10 |
+
def execute_script():
|
11 |
+
# import the a32f0n00 log
|
12 |
+
log = xes_importer.apply(os.path.join("..", "tests", "compressed_input_data", "09_a32f0n00.xes.gz"))
|
13 |
+
# discover a model using the inductive miner
|
14 |
+
process_tree = inductive_miner.apply(log)
|
15 |
+
net, im, fm = process_tree_converter.apply(process_tree)
|
16 |
+
# apply the alignments decomposition with a maximal number of border disagreements set to 5
|
17 |
+
aa = time.time()
|
18 |
+
aligned_traces = dec_align.apply(log, net, im, fm, parameters={
|
19 |
+
dec_align.Variants.RECOMPOS_MAXIMAL.value.Parameters.PARAM_THRESHOLD_BORDER_AGREEMENT: 5})
|
20 |
+
bb = time.time()
|
21 |
+
print(bb-aa)
|
22 |
+
# print(aligned_traces)
|
23 |
+
# calculate the fitness over the recomposed alignment (use the classical evaluation)
|
24 |
+
fitness = rep_fit.evaluate(aligned_traces, variant=rep_fit.Variants.ALIGNMENT_BASED)
|
25 |
+
print(fitness)
|
26 |
+
|
27 |
+
|
28 |
+
if __name__ == "__main__":
|
29 |
+
execute_script()
|
pm4py/examples/alignment_discounted_a_star.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
from pm4py.algo.conformance.alignments.petri_net import algorithm as ali
|
4 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
5 |
+
from pm4py.objects.petri_net.importer import importer as petri_importer
|
6 |
+
|
7 |
+
|
8 |
+
def testSynchronousDiscountedAlignment():
|
9 |
+
'''
|
10 |
+
This function runs an alignment based on the discounted edit distance
|
11 |
+
By using the synchronous product
|
12 |
+
:return:
|
13 |
+
'''
|
14 |
+
log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
|
15 |
+
pnml_path = os.path.join("..", "tests", "input_data", "running-example.pnml")
|
16 |
+
log = xes_importer.apply(log_path)
|
17 |
+
net, marking, fmarking = petri_importer.apply(pnml_path)
|
18 |
+
|
19 |
+
# to see the net :
|
20 |
+
#vizu(net,marking,fmarking).view()
|
21 |
+
|
22 |
+
start=time.time()
|
23 |
+
|
24 |
+
alignments1 = ali.apply(log._list[0], net, marking, fmarking,
|
25 |
+
variant=ali.VERSION_DISCOUNTED_A_STAR,
|
26 |
+
parameters={ali.Parameters.SYNCHRONOUS:True,ali.Parameters.EXPONENT:1.1})
|
27 |
+
print(alignments1)
|
28 |
+
print("Time:",(time.time()-start))
|
29 |
+
|
30 |
+
def testNoSynchronousDiscountedAlignment():
|
31 |
+
'''
|
32 |
+
This function runs an alignment based on the discounted edit distance
|
33 |
+
By using the Petri net and petri_net.utils.align_utils.discountedEditDistance function
|
34 |
+
'''
|
35 |
+
log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
|
36 |
+
pnml_path = os.path.join("..", "tests", "input_data", "running-example.pnml")
|
37 |
+
log = xes_importer.apply(log_path)
|
38 |
+
net, marking, fmarking = petri_importer.apply(pnml_path)
|
39 |
+
|
40 |
+
start=time.time()
|
41 |
+
|
42 |
+
alignments1 = ali.apply(log._list[0], net, marking, fmarking,
|
43 |
+
variant=ali.VERSION_DISCOUNTED_A_STAR,
|
44 |
+
parameters={ali.Parameters.SYNCHRONOUS:False,ali.Parameters.EXPONENT:1.1})
|
45 |
+
print(alignments1)
|
46 |
+
print("Time:",(time.time()-start))
|
47 |
+
|
48 |
+
|
49 |
+
if __name__ == '__main__':
|
50 |
+
# example on the first trace
|
51 |
+
testSynchronousDiscountedAlignment()
|
52 |
+
testNoSynchronousDiscountedAlignment()
|
pm4py/examples/alignment_test.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from pm4py import util
|
4 |
+
from pm4py.algo.conformance import alignments as ali
|
5 |
+
from pm4py.algo.conformance.alignments.petri_net.variants.state_equation_a_star import Parameters
|
6 |
+
from pm4py.objects import log as log_lib
|
7 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
8 |
+
from pm4py.objects.petri_net.importer import importer as petri_importer
|
9 |
+
from pm4py.objects.petri_net.utils.align_utils import pretty_print_alignments
|
10 |
+
|
11 |
+
|
12 |
+
def align(trace, net, im, fm, model_cost_function, sync_cost_function):
|
13 |
+
trace_costs = list(map(lambda e: 1000, trace))
|
14 |
+
params = dict()
|
15 |
+
params[util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = log_lib.util.xes.DEFAULT_NAME_KEY
|
16 |
+
params[Parameters.PARAM_MODEL_COST_FUNCTION] = model_cost_function
|
17 |
+
params[Parameters.PARAM_TRACE_COST_FUNCTION] = trace_costs
|
18 |
+
params[Parameters.PARAM_SYNC_COST_FUNCTION] = sync_cost_function
|
19 |
+
return ali.petri_net.algorithm.apply_trace(trace, net, im, fm, parameters=params,
|
20 |
+
variant=ali.petri_net.algorithm.VERSION_STATE_EQUATION_A_STAR)
|
21 |
+
|
22 |
+
|
23 |
+
def execute_script():
|
24 |
+
log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
|
25 |
+
pnml_path = os.path.join("..", "tests", "input_data", "running-example.pnml")
|
26 |
+
|
27 |
+
# log_path = 'C:/Users/bas/Documents/tue/svn/private/logs/a32_logs/a32f0n05.xes'
|
28 |
+
# pnml_path = 'C:/Users/bas/Documents/tue/svn/private/logs/a32_logs/a32.pnml'
|
29 |
+
|
30 |
+
log = xes_importer.apply(log_path)
|
31 |
+
net, marking, fmarking = petri_importer.apply(pnml_path)
|
32 |
+
|
33 |
+
model_cost_function = dict()
|
34 |
+
sync_cost_function = dict()
|
35 |
+
for t in net.transitions:
|
36 |
+
if t.label is not None:
|
37 |
+
model_cost_function[t] = 1000
|
38 |
+
sync_cost_function[t] = 0
|
39 |
+
else:
|
40 |
+
model_cost_function[t] = 1
|
41 |
+
|
42 |
+
alignments = []
|
43 |
+
for trace in log:
|
44 |
+
alignments.append(align(trace, net, marking, fmarking, model_cost_function, sync_cost_function))
|
45 |
+
|
46 |
+
pretty_print_alignments(alignments)
|
47 |
+
|
48 |
+
|
49 |
+
if __name__ == '__main__':
|
50 |
+
execute_script()
|
pm4py/examples/all_optimal_alignments.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
from pm4py.algo.conformance.alignments.petri_net.variants import generator_dijkstra_less_memory
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
def execute_script():
|
7 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"), return_legacy_log_object=True)
|
8 |
+
|
9 |
+
net, im, fm = pm4py.discover_petri_net_inductive(log)
|
10 |
+
|
11 |
+
for trace in log:
|
12 |
+
print("\n\n")
|
13 |
+
for ali in generator_dijkstra_less_memory.apply(trace, net, im, fm):
|
14 |
+
print(ali)
|
15 |
+
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
execute_script()
|
pm4py/examples/antialignments_and_precision.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from pm4py.algo.conformance.antialignments.variants.discounted_a_star import apply as antii
|
4 |
+
from pm4py.algo.conformance.antialignments.algorithm import Parameters
|
5 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
6 |
+
from pm4py.objects.petri_net.importer import importer as petri_importer
|
7 |
+
|
8 |
+
|
9 |
+
if __name__ == '__main__':
|
10 |
+
log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
|
11 |
+
pnml_path = os.path.join("..", "tests", "input_data", "running-example.pnml")
|
12 |
+
log = xes_importer.apply(log_path)
|
13 |
+
net, marking, fmarking = petri_importer.apply(pnml_path)
|
14 |
+
|
15 |
+
THETA = 1.5
|
16 |
+
MU = 20
|
17 |
+
EPSILON = 0.01
|
18 |
+
resAnti = antii(log,net,marking,fmarking, parameters={Parameters.EXPONENT:THETA,
|
19 |
+
Parameters.EPSILON:EPSILON,
|
20 |
+
Parameters.MARKING_LIMIT:MU})
|
21 |
+
print(resAnti['anti-alignment'])
|
22 |
+
print("Precision:",resAnti['precision'])
|
23 |
+
|
pm4py/examples/backwards_token_replay.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
2 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
|
3 |
+
from pm4py.algo.conformance.tokenreplay import algorithm as tr
|
4 |
+
from pm4py.objects.conversion.process_tree import converter as process_tree_converter
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def execute_script():
|
9 |
+
log = xes_importer.apply(os.path.join("..", "tests", "input_data", "running-example.xes"))
|
10 |
+
process_tree = inductive_miner.apply(log)
|
11 |
+
net, im, fm = process_tree_converter.apply(process_tree)
|
12 |
+
# perform the backwards token-based replay
|
13 |
+
replayed_traces = tr.apply(log, net, im, fm, variant=tr.Variants.BACKWARDS)
|
14 |
+
print(replayed_traces)
|
15 |
+
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
execute_script()
|
pm4py/examples/batch_detection.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
from pm4py.algo.discovery.batches import algorithm
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
def execute_script():
|
7 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes"))
|
8 |
+
# detect the batches from the event log
|
9 |
+
batches = algorithm.apply(log)
|
10 |
+
# print the batches (complete information) in a single row
|
11 |
+
print(batches)
|
12 |
+
# print a summary information (size) for each activity-resource combination that is performed in batches
|
13 |
+
for batch in batches:
|
14 |
+
print(batch[0], batch[1])
|
15 |
+
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
execute_script()
|
pm4py/examples/bpmn_from_pt_conversion.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import pm4py
|
4 |
+
from pm4py.objects.conversion.process_tree import converter as pt_converter
|
5 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
|
6 |
+
from pm4py.objects.log.importer.xes import importer as xes_import
|
7 |
+
from pm4py.objects.bpmn.exporter import exporter as bpmn_exporter
|
8 |
+
from examples import examples_conf
|
9 |
+
import importlib.util
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
def execute_script():
|
14 |
+
log_path = os.path.join(os.path.join("..", "tests", "input_data", "running-example.xes"))
|
15 |
+
log = xes_import.apply(log_path)
|
16 |
+
ptree = inductive_miner.apply(log)
|
17 |
+
bpmn = pt_converter.apply(ptree, variant=pt_converter.Variants.TO_BPMN)
|
18 |
+
|
19 |
+
if importlib.util.find_spec("graphviz"):
|
20 |
+
bpmn_exporter.apply(bpmn, "stru.bpmn")
|
21 |
+
os.remove("stru.bpmn")
|
22 |
+
pm4py.view_bpmn(bpmn, format=examples_conf.TARGET_IMG_FORMAT)
|
23 |
+
|
24 |
+
|
25 |
+
if __name__ == "__main__":
|
26 |
+
execute_script()
|
pm4py/examples/bpmn_import_and_to_petri_net.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import pm4py
|
4 |
+
from pm4py.objects.bpmn.importer import importer as bpmn_importer
|
5 |
+
from pm4py.objects.conversion.bpmn import converter as bpmn_converter
|
6 |
+
|
7 |
+
|
8 |
+
def execute_script():
|
9 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
|
10 |
+
bpmn_graph = bpmn_importer.apply(os.path.join("..", "tests", "input_data", "running-example.bpmn"))
|
11 |
+
net, im, fm = bpmn_converter.apply(bpmn_graph, variant=bpmn_converter.Variants.TO_PETRI_NET)
|
12 |
+
precision_tbr = pm4py.precision_token_based_replay(log, net, im, fm)
|
13 |
+
print("precision", precision_tbr)
|
14 |
+
fitness_tbr = pm4py.precision_token_based_replay(log, net, im, fm)
|
15 |
+
print("fitness", fitness_tbr)
|
16 |
+
print(pm4py.check_soundness(net, im, fm))
|
17 |
+
|
18 |
+
|
19 |
+
if __name__ == "__main__":
|
20 |
+
execute_script()
|
pm4py/examples/bpmn_js_visualization.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
import importlib.util
|
3 |
+
|
4 |
+
|
5 |
+
def execute_script():
|
6 |
+
log = pm4py.read_xes("../tests/input_data/running-example.xes")
|
7 |
+
|
8 |
+
bpmn_model = pm4py.discover_bpmn_inductive(log)
|
9 |
+
|
10 |
+
if importlib.util.find_spec("graphviz"):
|
11 |
+
pm4py.view_bpmn(bpmn_model, variant_str="dagrejs")
|
12 |
+
|
13 |
+
|
14 |
+
if __name__ == "__main__":
|
15 |
+
execute_script()
|
pm4py/examples/case_overlap_stat.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
import os
|
3 |
+
from pm4py.statistics.overlap.cases.log import get as case_overlap_get
|
4 |
+
|
5 |
+
|
6 |
+
def execute_script():
|
7 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes"))
|
8 |
+
# calculates the WIP statistics from the event log object.
|
9 |
+
# The WIP statistic associates to each case the number of cases open during the lifecycle of the case
|
10 |
+
wip = case_overlap_get.apply(log)
|
11 |
+
print(wip)
|
12 |
+
|
13 |
+
|
14 |
+
if __name__ == "__main__":
|
15 |
+
execute_script()
|
pm4py/examples/consecutive_act_case_grouping_filter.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
from pm4py.algo.filtering.pandas.consecutive_act_case_grouping import consecutive_act_case_grouping_filter
|
3 |
+
|
4 |
+
|
5 |
+
def execute_script():
|
6 |
+
dataframe = pm4py.read_xes("../tests/input_data/receipt.xes")
|
7 |
+
print(dataframe)
|
8 |
+
filtered_dataframe = consecutive_act_case_grouping_filter.apply(dataframe)
|
9 |
+
print(filtered_dataframe)
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
execute_script()
|
pm4py/examples/corr_mining.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.util import constants, pandas_utils
|
2 |
+
|
3 |
+
from pm4py.algo.discovery.correlation_mining import algorithm as correlation_miner
|
4 |
+
from pm4py.objects.log.util import dataframe_utils
|
5 |
+
from pm4py.statistics.service_time.pandas import get as soj_time_get
|
6 |
+
from pm4py.statistics.start_activities.pandas import get as sa_get
|
7 |
+
from pm4py.statistics.end_activities.pandas import get as ea_get
|
8 |
+
from examples import examples_conf
|
9 |
+
import importlib.util
|
10 |
+
|
11 |
+
|
12 |
+
def execute_script():
|
13 |
+
df = pandas_utils.read_csv("../tests/input_data/interval_event_log.csv")
|
14 |
+
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT)
|
15 |
+
act_count = dict(df["concept:name"].value_counts())
|
16 |
+
parameters = {}
|
17 |
+
parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
|
18 |
+
parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
|
19 |
+
parameters["format"] = examples_conf.TARGET_IMG_FORMAT
|
20 |
+
start_activities = sa_get.get_start_activities(df, parameters=parameters)
|
21 |
+
end_activities = ea_get.get_end_activities(df, parameters=parameters)
|
22 |
+
parameters["start_activities"] = start_activities
|
23 |
+
parameters["end_activities"] = end_activities
|
24 |
+
soj_time = soj_time_get.apply(df, parameters=parameters)
|
25 |
+
dfg, performance_dfg = correlation_miner.apply(df, variant=correlation_miner.Variants.CLASSIC,
|
26 |
+
parameters=parameters)
|
27 |
+
|
28 |
+
if importlib.util.find_spec("grapviz"):
|
29 |
+
from pm4py.visualization.dfg import visualizer as dfg_vis
|
30 |
+
gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, serv_time=soj_time, variant=dfg_vis.Variants.FREQUENCY,
|
31 |
+
parameters=parameters)
|
32 |
+
dfg_vis.view(gviz_freq)
|
33 |
+
gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, serv_time=soj_time,
|
34 |
+
variant=dfg_vis.Variants.PERFORMANCE,
|
35 |
+
parameters=parameters)
|
36 |
+
dfg_vis.view(gviz_perf)
|
37 |
+
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
execute_script()
|
pm4py/examples/cost_based_dfg.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pm4py
|
2 |
+
import os
|
3 |
+
from pm4py.algo.discovery.dfg.adapters.pandas import df_statistics
|
4 |
+
from examples import examples_conf
|
5 |
+
import importlib.util
|
6 |
+
|
7 |
+
|
8 |
+
def execute_script():
|
9 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"), return_legacy_log_object=False)
|
10 |
+
cost_based_dfg = df_statistics.get_dfg_graph(log, measure="cost", cost_attribute="amount")
|
11 |
+
|
12 |
+
if importlib.util.find_spec("grapviz"):
|
13 |
+
from pm4py.visualization.dfg import visualizer as dfg_visualizer
|
14 |
+
gviz = dfg_visualizer.apply(cost_based_dfg, variant=dfg_visualizer.Variants.COST, parameters={"format": examples_conf.TARGET_IMG_FORMAT})
|
15 |
+
dfg_visualizer.view(gviz)
|
16 |
+
|
17 |
+
|
18 |
+
if __name__ == "__main__":
|
19 |
+
execute_script()
|
pm4py/examples/cycle_time.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import pm4py
|
4 |
+
from pm4py.statistics.traces.cycle_time.log import get as cycle_time_get
|
5 |
+
|
6 |
+
|
7 |
+
def execute_script():
|
8 |
+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "interval_event_log.xes"))
|
9 |
+
print(cycle_time_get.apply(log, parameters={cycle_time_get.Parameters.START_TIMESTAMP_KEY: "start_timestamp",
|
10 |
+
cycle_time_get.Parameters.TIMESTAMP_KEY: "time:timestamp"}))
|
11 |
+
|
12 |
+
|
13 |
+
if __name__ == "__main__":
|
14 |
+
execute_script()
|
pm4py/examples/data_petri_nets.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import pm4py
|
4 |
+
from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments
|
5 |
+
from pm4py.objects.petri_net.data_petri_nets import semantics
|
6 |
+
from pm4py.objects.petri_net.data_petri_nets.data_marking import DataMarking
|
7 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
8 |
+
from examples import examples_conf
|
9 |
+
import importlib.util
|
10 |
+
|
11 |
+
|
12 |
+
def get_trans_by_name(net, name):
|
13 |
+
ret = [x for x in net.transitions if x.name == name]
|
14 |
+
if len(ret) == 0:
|
15 |
+
return None
|
16 |
+
return ret[0]
|
17 |
+
|
18 |
+
|
19 |
+
def execute_script():
|
20 |
+
log = xes_importer.apply(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"))
|
21 |
+
net, im, fm = pm4py.read_pnml(os.path.join("..", "tests", "input_data", "data_petri_net.pnml"), auto_guess_final_marking=True)
|
22 |
+
|
23 |
+
if importlib.util.find_spec("graphviz"):
|
24 |
+
pm4py.view_petri_net(net, im, fm, format=examples_conf.TARGET_IMG_FORMAT)
|
25 |
+
|
26 |
+
aligned_traces = alignments.apply(log, net, im, fm, variant=alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY, parameters={"ret_tuple_as_trans_desc": True})
|
27 |
+
for index, trace in enumerate(log):
|
28 |
+
aligned_trace = aligned_traces[index]
|
29 |
+
al = [(x[0][0], get_trans_by_name(net, x[0][1])) for x in aligned_trace["alignment"]]
|
30 |
+
m = DataMarking(im)
|
31 |
+
idx = 0
|
32 |
+
for el in al:
|
33 |
+
if el[1] is not None:
|
34 |
+
en_t = semantics.enabled_transitions(net, m, trace[min(idx, len(trace) - 1)])
|
35 |
+
if el[1] in en_t:
|
36 |
+
if "guard" in el[1].properties:
|
37 |
+
print(el[1], "GUARD SATISFIED", el[1].properties["guard"], m)
|
38 |
+
m = semantics.execute(el[1], net, m, trace[min(idx, len(trace) - 1)])
|
39 |
+
else:
|
40 |
+
print("TRANSITION UNAVAILABLE! Guards are blocking")
|
41 |
+
if el[0] != ">>":
|
42 |
+
idx = idx + 1
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
execute_script()
|
pm4py/examples/dataframe_prefix_and_fea_extraction.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.objects.log.util import dataframe_utils
|
2 |
+
from pm4py.util import pandas_utils, constants
|
3 |
+
|
4 |
+
|
5 |
+
def execute_script():
|
6 |
+
# loads a dataframe. setup dates
|
7 |
+
df = pandas_utils.read_csv("../tests/input_data/receipt.csv")
|
8 |
+
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT)
|
9 |
+
print(df)
|
10 |
+
# insert the case index in the dataframe
|
11 |
+
df = pandas_utils.insert_ev_in_tr_index(df, case_id="case:concept:name", column_name="@@index_in_trace")
|
12 |
+
# filter all the prefixes of length 5 from the dataframe
|
13 |
+
df = df[df["@@index_in_trace"] <= 5]
|
14 |
+
print(df)
|
15 |
+
# performs the automatic feature extraction
|
16 |
+
fea_df = dataframe_utils.automatic_feature_extraction_df(df)
|
17 |
+
print("\nfea_df =")
|
18 |
+
print(fea_df)
|
19 |
+
print(fea_df.columns)
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
execute_script()
|
pm4py/examples/dec_treplay_imdf.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
|
4 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
5 |
+
from pm4py.objects.conversion.process_tree import converter as process_tree_converter
|
6 |
+
from examples import examples_conf
|
7 |
+
import importlib.util
|
8 |
+
|
9 |
+
|
10 |
+
def execute_script():
|
11 |
+
# import the log
|
12 |
+
log_path = os.path.join("..", "tests", "input_data", "receipt.xes")
|
13 |
+
log = xes_importer.apply(log_path)
|
14 |
+
# apply Inductive Miner
|
15 |
+
process_tree = inductive_miner.apply(log)
|
16 |
+
net, initial_marking, final_marking = process_tree_converter.apply(process_tree)
|
17 |
+
|
18 |
+
if importlib.util.find_spec("graphviz"):
|
19 |
+
from pm4py.visualization.petri_net import visualizer as pn_vis
|
20 |
+
# get visualization
|
21 |
+
variant = pn_vis.Variants.PERFORMANCE
|
22 |
+
parameters_viz = {pn_vis.Variants.PERFORMANCE.value.Parameters.AGGREGATION_MEASURE: "mean", pn_vis.Variants.PERFORMANCE.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT}
|
23 |
+
gviz = pn_vis.apply(net, initial_marking, final_marking, log=log, variant=variant,
|
24 |
+
parameters=parameters_viz)
|
25 |
+
pn_vis.view(gviz)
|
26 |
+
# do another visualization with frequency
|
27 |
+
variant = pn_vis.Variants.FREQUENCY
|
28 |
+
parameters_viz = {pn_vis.Variants.FREQUENCY.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT}
|
29 |
+
gviz = pn_vis.apply(net, initial_marking, final_marking, log=log, variant=variant,
|
30 |
+
parameters=parameters_viz)
|
31 |
+
pn_vis.view(gviz)
|
32 |
+
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
execute_script()
|
pm4py/examples/decisiontree_align_example.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
2 |
+
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
|
3 |
+
from pm4py.algo.decision_mining import algorithm
|
4 |
+
from pm4py.objects.conversion.process_tree import converter as process_tree_converter
|
5 |
+
from examples import examples_conf
|
6 |
+
import os
|
7 |
+
import importlib.util
|
8 |
+
|
9 |
+
|
10 |
+
def execute_script():
|
11 |
+
# in this case, we obtain a decision tree by alignments on a specific decision point
|
12 |
+
log = xes_importer.apply(os.path.join("..", "tests", "input_data", "running-example.xes"))
|
13 |
+
process_tree = inductive_miner.apply(log)
|
14 |
+
net, im, fm = process_tree_converter.apply(process_tree)
|
15 |
+
|
16 |
+
if importlib.util.find_spec("sklearn") and importlib.util.find_spec("graphviz"):
|
17 |
+
# we need to specify a decision point. In this case, the place p_10 is a suitable decision point
|
18 |
+
clf, feature_names, classes = algorithm.get_decision_tree(log, net, im, fm, decision_point="p_10")
|
19 |
+
|
20 |
+
# we can visualize the decision tree
|
21 |
+
from pm4py.visualization.decisiontree import visualizer as visualizer
|
22 |
+
gviz = visualizer.apply(clf, feature_names, classes,
|
23 |
+
parameters={visualizer.Variants.CLASSIC.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT})
|
24 |
+
visualizer.view(gviz)
|
25 |
+
|
26 |
+
|
27 |
+
if __name__ == "__main__":
|
28 |
+
execute_script()
|
pm4py/examples/decisiontree_trivial_example.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from pm4py.util import ml_utils
|
4 |
+
from pm4py.objects.log.importer.xes import importer as xes_importer
|
5 |
+
from pm4py.objects.log.util import get_class_representation
|
6 |
+
from pm4py.algo.transformation.log_to_features import algorithm as log_to_features
|
7 |
+
from examples import examples_conf
|
8 |
+
import importlib.util
|
9 |
+
|
10 |
+
|
11 |
+
def execute_script():
|
12 |
+
log_path = os.path.join("..", "tests", "input_data", "roadtraffic50traces.xes")
|
13 |
+
# log_path = os.path.join("..", "tests", "input_data", "receipt.xes")
|
14 |
+
log = xes_importer.apply(log_path)
|
15 |
+
# now, it is possible to get a default representation of an event log
|
16 |
+
data, feature_names = log_to_features.apply(log, variant=log_to_features.Variants.TRACE_BASED)
|
17 |
+
# gets classes representation by final concept:name value (end activity)
|
18 |
+
target, classes = get_class_representation.get_class_representation_by_str_ev_attr_value_value(log, "concept:name")
|
19 |
+
|
20 |
+
if importlib.util.find_spec("sklearn") and importlib.util.find_spec("graphviz"):
|
21 |
+
# mine the decision tree given 'data' and 'target'
|
22 |
+
clf = ml_utils.DecisionTreeClassifier(max_depth=7)
|
23 |
+
clf.fit(data, target)
|
24 |
+
|
25 |
+
# visualize the decision tree
|
26 |
+
from pm4py.visualization.decisiontree import visualizer as dt_vis
|
27 |
+
gviz = dt_vis.apply(clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT})
|
28 |
+
dt_vis.view(gviz)
|
29 |
+
|
30 |
+
# gets classes representation by trace duration (threshold between the two classes = 200D)
|
31 |
+
target, classes = get_class_representation.get_class_representation_by_trace_duration(log, 2 * 8640000)
|
32 |
+
|
33 |
+
if importlib.util.find_spec("sklearn") and importlib.util.find_spec("graphviz"):
|
34 |
+
# mine the decision tree given 'data' and 'target'
|
35 |
+
clf = ml_utils.DecisionTreeClassifier(max_depth=7)
|
36 |
+
clf.fit(data, target)
|
37 |
+
|
38 |
+
# visualize the decision tree
|
39 |
+
from pm4py.visualization.decisiontree import visualizer as dt_vis
|
40 |
+
gviz = dt_vis.apply(clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: examples_conf.TARGET_IMG_FORMAT})
|
41 |
+
dt_vis.view(gviz)
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
execute_script()
|