diff --git "a/notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb" "b/notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb" @@ -0,0 +1,336 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "632c2d7d-ada5-4708-82d4-1a414141966b", + "metadata": {}, + "source": [ + "# Generating traces using pt_gen\n", + "Comparison with log from [running-example.xes](https://pm4py.fit.fraunhofer.de/static/assets/examples/running-example.xes)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1e561eed-1b37-4e60-812b-75095fe50c44", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "102135589d3b461ea7eef5fbf0d254e3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "parsing log, completed traces :: 0%| | 0/6 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pm4py.algo.discovery.inductive import algorithm as inductive_miner\n", + "from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner\n", + "from pm4py.visualization.petri_net import visualizer as pn_visualizer\n", + "from pm4py import save_vis_petri_net\n", + "\n", + "#OUTPUT_PATH =\"../plots/\"+INPUT_PATH.rsplit(\"/\",1)[1].rsplit(\".\",1)[0]+\"_all.png\"\n", + "net, initial_marking, final_marking = inductive_miner.apply(log,\n", + " parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 1,\n", + " pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"})\n", + "parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"}\n", + "fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=log)\n", + "#fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, log=log)\n", + "\n", + "\n", + "pn_visualizer.view(fig_pt)\n", + "#print(\"Saved in\", OUTPUT_PATH)\n", + "#save_vis_petri_net(net, initial_marking, final_marking, OUTPUT_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b5032ed4-3a34-4741-b433-2a8dd45aeffe", + "metadata": {}, + "outputs": [], + "source": [ + "runex_tree =pm4py.discover_process_tree_inductive(log)\n", + "#pm4py.view_process_tree(runex_tree)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6410555d-b0ef-4f72-bd45-d7f95ac4d660", + "metadata": {}, + "outputs": [], + "source": [ + "# Event log stats\n", + "import numpy as np\n", + "import statistics\n", + "from pm4py.objects.conversion.log import converter as log_converter\n", + "\n", + "def get_length_trace_descriptors(log):\n", + " trace_len = []\n", + " \n", + " variants = pm4py.get_variants_as_tuples(log)\n", + " variant_keys = variants.keys() \n", + " for variant_key in variant_keys:\n", + " #nOf_events += len(variant_key)\n", + " for i in range(len(variants[variant_key])):\n", + " trace_len.append(len(variants[variant_key][0]))\n", + " min_act = min(trace_len)\n", + " max_act = max(trace_len)\n", + " mode = statistics.mode(trace_len)\n", + " mean = round(np.mean(trace_len))\n", + " std = round(np.std(trace_len),2)\n", + " \n", + " return min_act, max_act, mode, std\n", + "\n", + "def get_nOf_events(log):\n", + " nOf_events=0\n", + " for trace in log: \n", + " #print(trace)\n", + " nOf_events += len(trace)\n", + "\n", + " return nOf_events\n", + "\n", + "def print_stats(log):\n", + " df = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)\n", + " print(\"Number of events:\", get_nOf_events(log))\n", + " print(\"Number of traces:\", len(log))\n", + " print(\"Number of variants:\", len(pm4py.stats.get_variants_as_tuples(log).keys()))\n", + " print(\"Number of activities:\", len(df['concept:name'].unique()))\n", + " print(\"Length of traces descriptors (min, max, mode, std):\", get_length_trace_descriptors(log))" + ] + }, + { + "cell_type": "raw", + "id": "4ed3fb0f-9298-467f-997a-28ff06489afe", + "metadata": {}, + "source": [ + " Parameters\n", + " --------------\n", + " parameters\n", + " Parameters of the algorithm, according to the paper:\n", + " - Parameters.MODE: most frequent number of visible activities\n", + " - Parameters.MIN: minimum number of visible activities\n", + " - Parameters.MAX: maximum number of visible activities\n", + " - Parameters.SEQUENCE: probability to add a sequence operator to tree\n", + " - Parameters.CHOICE: probability to add a choice operator to tree\n", + " - Parameters.PARALLEL: probability to add a parallel operator to tree\n", + " - Parameters.LOOP: probability to add a loop operator to tree\n", + " - Parameters.OR: probability to add an or operator to tree\n", + " - Parameters.SILENT: probability to add silent activity to a choice or loop operator\n", + " - Parameters.DUPLICATE: probability to duplicate an activity label\n", + " - Parameters.NO_MODELS: number of trees to generate from model population" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7f81da12-8d86-4b96-9660-f89973ae4c34", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4b5225db83b44a5a82b9618e704cf24a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "replaying log with TBR, completed variants :: 0%| | 0/244 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "playout_log = pm4py.sim.play_out(runex_tree)\n", + "\n", + "net, initial_marking, final_marking = inductive_miner.apply(playout_log,\n", + " parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 1,\n", + " pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"})\n", + "parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"}\n", + "fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=playout_log)\n", + "#fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, log=log)\n", + "\n", + "\n", + "pn_visualizer.view(fig_pt)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "42a20016-2596-45ea-9278-7a3dc810efbf", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d19f04b34ac745c792ba8cb0db155c97", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "replaying log with TBR, completed variants :: 0%| | 0/699 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pm4py.algo.simulation.tree_generator import algorithm as tree_generator\n", + "\n", + "PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: 5,\n", + " tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: 30,\n", + " tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: 10}\n", + "process_tree = pm4py.generate_process_tree(parameters=PARAMS)\n", + "#pm4py.view_process_tree(process_tree)\n", + "ptgen_log = pm4py.sim.play_out(process_tree)\n", + "\n", + "net, initial_marking, final_marking = inductive_miner.apply(ptgen_log,\n", + " parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 1,\n", + " pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"})\n", + "parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: \"png\"}\n", + "fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=ptgen_log)\n", + "#fig_pt = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters, log=log)\n", + "\n", + "\n", + "pn_visualizer.view(fig_pt)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "aaeb64ca-b270-40d9-9f2f-8203c6dcede6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of events: 42\n", + "Number of traces: 6\n", + "Number of variants: 6\n", + "Number of activities: 8\n", + "Length of traces descriptors (min, max, mode, std): (5, 13, 5, 3.06)\n", + "___________\n", + "Number of events: 9012\n", + "Number of traces: 1000\n", + "Number of variants: 244\n", + "Number of activities: 8\n", + "Length of traces descriptors (min, max, mode, std): (5, 41, 5, 5.57)\n", + "___________\n", + "Number of events: 40162\n", + "Number of traces: 1000\n", + "Number of variants: 699\n", + "Number of activities: 21\n", + "Length of traces descriptors (min, max, mode, std): (1, 318, 1, 47.28)\n" + ] + } + ], + "source": [ + "print_stats(log)\n", + "print(\"___________\")\n", + "print_stats(playout_log)\n", + "print(\"___________\")\n", + "print_stats(ptgen_log)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21259f33-162c-497c-9368-dd2320919f72", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}