Spaces:
Running
Running
File size: 15,704 Bytes
e60e568 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
from typing import Tuple, Dict, Optional
from pm4py.objects.bpmn.obj import BPMN
from pm4py.objects.log.obj import EventLog
from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.process_tree.obj import ProcessTree
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.util import dataframe_utils
from pm4py.util import constants
import os
from pandas import DataFrame
import importlib.util
from typing import Union
INDEX_COLUMN = "@@index"
__doc__ = """
The ``pm4py.read`` module contains all funcationality related to reading files/objects from disk.
"""
def read_xes(file_path: str, variant: Optional[str] = None, return_legacy_log_object: bool = constants.DEFAULT_READ_XES_LEGACY_OBJECT, encoding: str = constants.DEFAULT_ENCODING, **kwargs) -> Union[DataFrame, EventLog]:
"""
Reads an event log stored in XES format (see `xes-standard <https://xes-standard.org/>`_)
Returns a table (``pandas.DataFrame``) view of the event log.
:param file_path: file path of the event log (``.xes`` file) on disk
:param variant: the variant of the importer to use. "iterparse" => traditional XML parser; "line_by_line" => text-based line-by-line importer ; "chunk_regex" => chunk-of-bytes importer (default); "iterparse20" => XES 2.0 importer
:param return_legacy_log_object: boolean value enabling returning a log object (default: False)
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``DataFrame``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("<path_to_xes_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
if variant is None:
variant = constants.DEFAULT_XES_PARSER
from pm4py.objects.log.importer.xes import importer as xes_importer
v = xes_importer.Variants.CHUNK_REGEX
if variant == "iterparse_20":
v = xes_importer.Variants.ITERPARSE_20
elif variant == "iterparse":
v = xes_importer.Variants.ITERPARSE
elif variant == "lxml":
v = xes_importer.Variants.ITERPARSE
elif variant == "iterparse_mem_compressed":
v = xes_importer.Variants.ITERPARSE_MEM_COMPRESSED
elif variant == "line_by_line":
v = xes_importer.Variants.LINE_BY_LINE
elif variant == "chunk_regex":
v = xes_importer.Variants.CHUNK_REGEX
elif variant == "rustxes":
v = xes_importer.Variants.RUSTXES
from copy import copy
parameters = copy(kwargs)
parameters["encoding"] = encoding
parameters["return_legacy_log_object"] = return_legacy_log_object
log = xes_importer.apply(file_path, variant=v, parameters=parameters)
if type(log) is EventLog and not return_legacy_log_object:
log = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)
return log
def read_pnml(file_path: str, auto_guess_final_marking: bool = False, encoding: str = constants.DEFAULT_ENCODING) -> Tuple[PetriNet, Marking, Marking]:
"""
Reads a Petri net object from a .pnml file.
The Petri net object returned is a triple containing the following objects:
1. Petrinet Object, encoded as a ``PetriNet`` class
#. Initial Marking
#. Final Marking
:rtype: ``Tuple[PetriNet, Marking, Marking]``
:param file_path: file path of the Petri net model (``.pnml`` file) on disk
:param encoding: the encoding to be used (default: utf-8)
.. code-block:: python3
import pm4py
pn = pm4py.read_pnml("<path_to_pnml_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.petri_net.importer import importer as pnml_importer
net, im, fm = pnml_importer.apply(file_path, parameters={"auto_guess_final_marking": auto_guess_final_marking, "encoding": encoding})
return net, im, fm
def read_ptml(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> ProcessTree:
"""
Reads a process tree object from a .ptml file
:param file_path: file path of the process tree object on disk
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``ProcessTree``
.. code-block:: python3
import pm4py
process_tree = pm4py.read_ptml("<path_to_ptml_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.process_tree.importer import importer as tree_importer
tree = tree_importer.apply(file_path, parameters={"encoding": encoding})
return tree
def read_dfg(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> Tuple[Dict[Tuple[str,str],int], Dict[str,int], Dict[str,int]]:
"""
Reads a DFG object from a .dfg file.
The DFG object returned is a triple containing the following objects:
1. DFG Object, encoded as a ``Dict[Tuple[str,str],int]``, s.t. ``DFG[('a','b')]=k`` implies that activity ``'a'`` is directly followed by activity ``'b'`` a total of ``k`` times in the log
#. Start activity dictionary, encoded as a ``Dict[str,int]``, s.t., ``S['a']=k`` implies that activity ``'a'`` is starting ``k`` traces in the event log
#. End activity dictionary, encoded as a ``Dict[str,int]``, s.t., ``E['z']=k`` implies that activity ``'z'`` is ending ``k`` traces in the event log.
:rtype: ``Tuple[Dict[Tuple[str,str],int], Dict[str,int], Dict[str,int]]``
:param file_path: file path of the dfg model on disk
:param encoding: the encoding to be used (default: utf-8)
.. code-block:: python3
import pm4py
dfg = pm4py.read_dfg("<path_to_dfg_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.dfg.importer import importer as dfg_importer
dfg, start_activities, end_activities = dfg_importer.apply(file_path, parameters={"encoding": encoding})
return dfg, start_activities, end_activities
def read_bpmn(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> BPMN:
"""
Reads a BPMN model from a .bpmn file
:param file_path: file path of the bpmn model
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``BPMN``
.. code-block:: python3
import pm4py
bpmn = pm4py.read_bpmn('<path_to_bpmn_file>')
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.bpmn.importer import importer as bpmn_importer
bpmn_graph = bpmn_importer.apply(file_path, parameters={"encoding": encoding})
return bpmn_graph
def read_ocel(file_path: str, objects_path: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an object-centric event log from a file (see: http://www.ocel-standard.org/).
The ``OCEL`` object is returned by this method
:param file_path: file path of the object-centric event log
:param objects_path: [Optional] file path from which the objects dataframe should be read
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("<path_to_ocel_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
if file_path.lower().endswith("csv"):
return read_ocel_csv(file_path, objects_path, encoding=encoding)
elif file_path.lower().endswith("jsonocel"):
return read_ocel_json(file_path, encoding=encoding)
elif file_path.lower().endswith("xmlocel"):
return read_ocel_xml(file_path, encoding=encoding)
elif file_path.lower().endswith(".sqlite"):
return read_ocel_sqlite(file_path, encoding=encoding)
raise Exception("unsupported file format")
def read_ocel_csv(file_path: str, objects_path: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an object-centric event log from a CSV file (see: http://www.ocel-standard.org/).
The ``OCEL`` object is returned by this method
:param file_path: file path of the object-centric event log (.csv)
:param objects_path: [Optional] file path from which the objects dataframe should be read
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel_csv("<path_to_ocel_file.csv>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.csv import importer as csv_importer
return csv_importer.apply(file_path, objects_path=objects_path, parameters={"encoding": encoding})
def read_ocel_json(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an object-centric event log from a JSON-OCEL file (see: http://www.ocel-standard.org/).
The ``OCEL`` object is returned by this method
:param file_path: file path of the object-centric event log (.jsonocel)
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel_json("<path_to_ocel_file.jsonocel>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.jsonocel import importer as jsonocel_importer
return jsonocel_importer.apply(file_path, variant=jsonocel_importer.Variants.CLASSIC, parameters={"encoding": encoding})
def read_ocel_xml(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an object-centric event log from a XML-OCEL file (see: http://www.ocel-standard.org/).
The ``OCEL`` object is returned by this method
:param file_path: file path of the object-centric event log (.xmlocel)
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel_xml("<path_to_ocel_file.xmlocel>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.xmlocel import importer as xmlocel_importer
return xmlocel_importer.apply(file_path, variant=xmlocel_importer.Variants.CLASSIC, parameters={"encoding": encoding})
def read_ocel_sqlite(file_path: str, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an object-centric event log from a SQLite database (see: http://www.ocel-standard.org/).
The ``OCEL`` object is returned by this method
:param file_path: file path of the SQLite database (.sqlite)
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel_sqlite("<path_to_ocel_file.sqlite>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.sqlite import importer as sqlite_importer
return sqlite_importer.apply(file_path, variant=sqlite_importer.Variants.PANDAS_IMPORTER, parameters={"encoding": encoding})
def read_ocel2(file_path: str, variant_str: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an OCEL2.0 event log
:param file_path: path to the OCEL2.0 event log
:param variant_str: (optional) specification of the importer variant to be used
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel2("<path_to_ocel_file>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
if file_path.lower().endswith("sqlite"):
return read_ocel2_sqlite(file_path, variant_str=variant_str, encoding=encoding)
elif file_path.lower().endswith("xml") or file_path.lower().endswith("xmlocel"):
return read_ocel2_xml(file_path, variant_str=variant_str, encoding=encoding)
elif file_path.lower().endswith("json") or file_path.lower().endswith("jsonocel"):
return read_ocel2_json(file_path, variant_str=variant_str, encoding=encoding)
def read_ocel2_json(file_path: str, variant_str: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an OCEL2.0 event log from a JSON-OCEL(2) file
:param file_path: path to the JSON file
:param variant_str: (optional) specification of the importer variant to be used
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel2_json("<path_to_ocel_file.jsonocel>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.jsonocel import importer as jsonocel_importer
variant = jsonocel_importer.Variants.OCEL20_STANDARD
if variant_str == "ocel20_rustxes":
variant = jsonocel_importer.Variants.OCEL20_RUSTXES
return jsonocel_importer.apply(file_path, variant=variant, parameters={"encoding": encoding})
def read_ocel2_sqlite(file_path: str, variant_str: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an OCEL2.0 event log from a SQLite database
:param file_path: path to the OCEL2.0 database
:param variant_str: (optional) specification of the importer variant to be used
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel2_sqlite("<path_to_ocel_file.sqlite>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.sqlite import importer as sqlite_importer
return sqlite_importer.apply(file_path, variant=sqlite_importer.Variants.OCEL20, parameters={"encoding": encoding})
def read_ocel2_xml(file_path: str, variant_str: Optional[str] = None, encoding: str = constants.DEFAULT_ENCODING) -> OCEL:
"""
Reads an OCEL2.0 event log from an XML file
:param file_path: path to the OCEL2.0 event log
:param variant_str: (optional) specification of the importer variant to be used
:param encoding: the encoding to be used (default: utf-8)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel2_xml("<path_to_ocel_file.xmlocel>")
"""
if not os.path.exists(file_path):
raise Exception("File does not exist")
from pm4py.objects.ocel.importer.xmlocel import importer as xml_importer
variant = xml_importer.Variants.OCEL20
if variant_str == "ocel20_rustxes":
variant = xml_importer.Variants.OCEL20_RUSTXES
return xml_importer.apply(file_path, variant=variant, parameters={"encoding": encoding})
|