process_mining / pm4py /pm4py /connectors.py
linpershey's picture
Add 'pm4py/' from commit '80970016c5e1e79af7c37df0dd88e17587fe7bcf'
b4ba3ec
raw
history blame
19.2 kB
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
from typing import Optional
import pandas as pd
from pm4py.objects.ocel.obj import OCEL
def extract_log_outlook_mails() -> pd.DataFrame:
"""
Extracts the history of the conversations from the local instance of Microsoft Outlook
running on the current computer.
CASE ID (case:concept:name) => identifier of the conversation
ACTIVITY (concept:name) => activity that is performed in the current item (send e-mail, receive e-mail,
refuse meeting ...)
TIMESTAMP (time:timestamp) => timestamp of creation of the item in Outlook
RESOURCE (org:resource) => sender of the current item
See also:
* https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.mailitem?redirectedfrom=MSDN&view=outlook-pia#properties_
* https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.olobjectclass?view=outlook-pia
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_outlook_mails()
"""
from pm4py.algo.connectors.variants import outlook_mail_extractor
return outlook_mail_extractor.apply()
def extract_log_outlook_calendar(email_user: Optional[str] = None, calendar_id: int = 9) -> pd.DataFrame:
"""
Extracts the history of the calendar events (creation, update, start, end)
in a Pandas dataframe from the local Outlook instance running on the current computer.
CASE ID (case:concept:name) => identifier of the meeting
ACTIVITY (concept:name) => one between: Meeting Created, Last Change of Meeting, Meeting Started, Meeting Completed
TIMESTAMP (time:timestamp) => the timestamp of the event
case:subject => the subject of the meeting
:param email_user: (optional) e-mail address from which the (shared) calendar should be extracted
:param calendar_id: identifier of the calendar for the given user (default: 9)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_outlook_calendar()
dataframe = pm4py.connectors.extract_log_outlook_calendar("[email protected]")
"""
from pm4py.algo.connectors.variants import outlook_calendar
parameters = {}
parameters[outlook_calendar.Parameters.EMAIL_USER] = email_user
parameters[outlook_calendar.Parameters.CALENDAR_ID] = calendar_id
return outlook_calendar.apply(parameters=parameters)
def extract_log_windows_events() -> pd.DataFrame:
"""
Extract a process mining dataframe from all the events recorded in the Windows registry.
CASE ID (case:concept:name) => name of the computer emitting the events.
ACTIVITY (concept:name) => concatenation of the source name of the event and the event identifier
(see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
TIMESTAMP (time:timestamp) => timestamp of generation of the event
RESOURCE (org:resource) => username involved in the event
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_windows_events()
"""
from pm4py.algo.connectors.variants import windows_events
return windows_events.apply()
def extract_log_chrome_history(history_db_path: Optional[str] = None) -> pd.DataFrame:
"""
Extracts a dataframe containing the navigation history of Google Chrome.
Please keep Google Chrome history closed when extracting.
CASE ID (case:concept:name) => an identifier of the profile that has been extracted
ACTIVITY (concept:name) => the complete path of the website, minus the GET arguments
TIMESTAMP (time:timestamp) => the timestamp of visit
:param history_db_path: path to the history DB path of Google Chrome (default: position of the Windows folder)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_chrome_history()
"""
from pm4py.algo.connectors.variants import chrome_history
parameters = {}
if history_db_path is not None:
parameters[chrome_history.Parameters.HISTORY_DB_PATH] = history_db_path
return chrome_history.apply(parameters=parameters)
def extract_log_firefox_history(history_db_path: Optional[str] = None) -> pd.DataFrame:
"""
Extracts a dataframe containing the navigation history of Mozilla Firefox.
Please keep Google Chrome history closed when extracting.
CASE ID (case:concept:name) => an identifier of the profile that has been extracted
ACTIVITY (concept:name) => the complete path of the website, minus the GET arguments
TIMESTAMP (time:timestamp) => the timestamp of visit
:param history_db_path: path to the history DB path of Mozilla Firefox (default: position of the Windows folder)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_firefox_history()
"""
from pm4py.algo.connectors.variants import firefox_history
parameters = {}
if history_db_path is not None:
parameters[firefox_history.Parameters.HISTORY_DB_PATH] = history_db_path
return firefox_history.apply(parameters=parameters)
def extract_log_github(owner: str = "pm4py", repo: str = "pm4py-core", auth_token: Optional[str] = None) -> pd.DataFrame:
"""
Extracts a dataframe containing the history of the issues of a Github repository.
According to the API limit rate of public/registered users, only a part of the events
can be returned.
:param owner: owner of the repository (e.g., pm4py)
:param repo: name of the repository (e.g., pm4py-core)
:param auth_token: authorization token
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_github(owner='pm4py', repo='pm4py-core')
"""
from pm4py.algo.connectors.variants import github_repo
parameters = {}
parameters[github_repo.Parameters.OWNER] = owner
parameters[github_repo.Parameters.REPOSITORY] = repo
parameters[github_repo.Parameters.AUTH_TOKEN] = auth_token
return github_repo.apply(parameters)
def extract_log_camunda_workflow(connection_string: str) -> pd.DataFrame:
"""
Extracts a dataframe from the Camunda workflow system. Aside from the traditional columns,
the processID of the process in Camunda is returned.
:param connection_string: ODBC connection string to the Camunda database
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_camunda_workflow('Driver={PostgreSQL Unicode(x64)};SERVER=127.0.0.3;DATABASE=process-engine;UID=xx;PWD=yy')
"""
from pm4py.algo.connectors.variants import camunda_workflow
parameters = {}
parameters[camunda_workflow.Parameters.CONNECTION_STRING] = connection_string
return camunda_workflow.apply(None, parameters=parameters)
def extract_log_sap_o2c(connection_string: str, prefix: str = "") -> pd.DataFrame:
"""
Extracts a dataframe for the SAP O2C process.
:param connection_string: ODBC connection string to the SAP database
:param prefix: prefix for the tables (example: SAPSR3.)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_sap_o2c('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
"""
from pm4py.algo.connectors.variants import sap_o2c
parameters = {}
parameters[sap_o2c.Parameters.CONNECTION_STRING] = connection_string
parameters[sap_o2c.Parameters.PREFIX] = prefix
return sap_o2c.apply(None, parameters=parameters)
def extract_log_sap_accounting(connection_string: str, prefix: str = "") -> pd.DataFrame:
"""
Extracts a dataframe for the SAP Accounting process.
:param connection_string: ODBC connection string to the SAP database
:param prefix: prefix for the tables (example: SAPSR3.)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_log_sap_accounting('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
"""
from pm4py.algo.connectors.variants import sap_accounting
parameters = {}
parameters[sap_accounting.Parameters.CONNECTION_STRING] = connection_string
parameters[sap_accounting.Parameters.PREFIX] = prefix
return sap_accounting.apply(None, parameters=parameters)
def extract_ocel_outlook_mails() -> OCEL:
"""
Extracts the history of the conversations from the local instance of Microsoft Outlook
running on the current computer as an object-centric event log.
ACTIVITY (ocel:activity) => activity that is performed in the current item (send e-mail, receive e-mail,
refuse meeting ...)
TIMESTAMP (ocel:timestamp) => timestamp of creation of the item in Outlook
Object types:
- org:resource => the snder of the mail
- recipients => the list of recipients of the mail
- topic => the topic of the discussion
See also:
* https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.mailitem?redirectedfrom=MSDN&view=outlook-pia#properties_
* https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.olobjectclass?view=outlook-pia
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.connectors.extract_ocel_outlook_mails()
"""
import pm4py
dataframe = pm4py.connectors.extract_log_outlook_mails()
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["org:resource", "recipients", "topic"])
def extract_ocel_outlook_calendar(email_user: Optional[str] = None, calendar_id: int = 9) -> OCEL:
"""
Extracts the history of the calendar events (creation, update, start, end)
as an object-centric event log from the local Outlook instance running on the current computer.
ACTIVITY (ocel:activity) => one between: Meeting Created, Last Change of Meeting, Meeting Started, Meeting Completed
TIMESTAMP (ocel:timestamp) => the timestamp of the event
Object types:
- case:concept:name => identifier of the meeting
- case:subject => the subject of the meeting
:param email_user: (optional) e-mail address from which the (shared) calendar should be extracted
:param calendar_id: identifier of the calendar for the given user (default: 9)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.connectors.extract_ocel_outlook_calendar()
ocel = pm4py.connectors.extract_ocel_outlook_calendar("[email protected]")
"""
import pm4py
dataframe = pm4py.connectors.extract_log_outlook_calendar(email_user, calendar_id)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "case:subject"])
def extract_ocel_windows_events() -> OCEL:
"""
Extract a process mining dataframe from all the events recorded in the Windows registry as an object-centric
event log.
ACTIVITY (concept:name) => concatenation of the source name of the event and the event identifier
(see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
TIMESTAMP (time:timestamp) => timestamp of generation of the event
Object types:
- categoryString: translation of the subcategory. The translation is source-specific.
- computerName: name of the computer that generated this event.
- eventIdentifier: identifier of the event. This is specific to the source that generated the event log entry.
- eventType: 1=Error; 2=Warning; 3=Information; 4=Security Audit Success;5=Security Audit Failure;
- sourceName: name of the source (application, service, driver, or subsystem) that generated the entry.
- user: user name of the logged-on user when the event occurred. If the user name cannot be determined, this will be NULL.
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
ocel = pm4py.connectors.extract_ocel_windows_events()
"""
import pm4py
dataframe = pm4py.connectors.extract_log_windows_events()
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["categoryString", "computerName", "eventIdentifier", "eventType", "sourceName", "user"])
def extract_ocel_chrome_history(history_db_path: Optional[str] = None) -> OCEL:
"""
Extracts an object-centric event log containing the navigation history of Google Chrome.
Please keep Google Chrome history closed when extracting.
ACTIVITY (ocel:activity) => the complete path of the website, minus the GET arguments
TIMESTAMP (ocel:timestamp) => the timestamp of visit
Object Types:
- case:concept:name : the profile of Chrome that is used to visit the site
- complete_url: the complete URL of the website
- url_wo_parameters: complete URL minus the part after ?
- domain: the domain of the website that is visited
:param history_db_path: path to the history DB path of Google Chrome (default: position of the Windows folder)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_ocel_chrome_history()
"""
import pm4py
dataframe = pm4py.connectors.extract_log_chrome_history(history_db_path)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "complete_url", "url_wo_parameters", "domain"])
def extract_ocel_firefox_history(history_db_path: Optional[str] = None) -> OCEL:
"""
Extracts an object-centric event log containing the navigation history of Mozilla Firefox.
Please keep Mozilla Firefox history closed when extracting.
ACTIVITY (ocel:activity) => the complete path of the website, minus the GET arguments
TIMESTAMP (ocel:timestamp) => the timestamp of visit
Object Types:
- case:concept:name : the profile of Firefox that is used to visit the site
- complete_url: the complete URL of the website
- url_wo_parameters: complete URL minus the part after ?
- domain: the domain of the website that is visited
:param history_db_path: path to the history DB path of Mozilla Firefox (default: position of the Windows folder)
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_ocel_firefox_history()
"""
import pm4py
dataframe = pm4py.connectors.extract_log_firefox_history(history_db_path)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "complete_url", "url_wo_parameters", "domain"])
def extract_ocel_github(owner: str = "pm4py", repo: str = "pm4py-core", auth_token: Optional[str] = None) -> OCEL:
"""
Extracts a dataframe containing the history of the issues of a Github repository.
According to the API limit rate of public/registered users, only a part of the events
can be returned.
ACTIVITY (ocel:activity) => the event (created, commented, closed, subscribed ...)
TIMESTAMP (ocel:timestamp) => the timestamp of execution of the event
Object types:
- case:concept:name => the URL of the events related to the issue
- org:resource => the involved resource
- case:repo => the repository in which the issue is created
:param owner: owner of the repository (e.g., pm4py)
:param repo: name of the repository (e.g., pm4py-core)
:param auth_token: authorization token
:rtype: ``OCEL``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_ocel_github(owner='pm4py', repo='pm4py-core')
"""
import pm4py
dataframe = pm4py.connectors.extract_log_github(owner, repo, auth_token)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource", "case:repo"])
def extract_ocel_camunda_workflow(connection_string: str) -> OCEL:
"""
Extracts an object-centric event log from the Camunda workflow system.
:param connection_string: ODBC connection string to the Camunda database
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
ocel = pm4py.connectors.extract_ocel_camunda_workflow('Driver={PostgreSQL Unicode(x64)};SERVER=127.0.0.3;DATABASE=process-engine;UID=xx;PWD=yy')
"""
import pm4py
dataframe = pm4py.connectors.extract_log_camunda_workflow(connection_string)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "processID", "org:resource"])
def extract_ocel_sap_o2c(connection_string: str, prefix: str = '') -> OCEL:
"""
Extracts an object-centric event log for the SAP O2C process.
:param connection_string: ODBC connection string to the SAP database
:param prefix: prefix for the tables (example: SAPSR3.)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_ocel_sap_o2c('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
"""
import pm4py
dataframe = pm4py.connectors.extract_log_sap_o2c(connection_string, prefix=prefix)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource"])
def extract_ocel_sap_accounting(connection_string: str, prefix: str = '') -> OCEL:
"""
Extracts an object-centric event log for the SAP Accounting process.
:param connection_string: ODBC connection string to the SAP database
:param prefix: prefix for the tables (example: SAPSR3.)
:rtype: ``pd.DataFrame``
.. code-block:: python3
import pm4py
dataframe = pm4py.connectors.extract_ocel_sap_accounting('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
"""
import pm4py
dataframe = pm4py.connectors.extract_log_sap_accounting(connection_string, prefix=prefix)
return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource"])