File size: 19,187 Bytes
8097001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''

from typing import Optional
import pandas as pd
from pm4py.objects.ocel.obj import OCEL


def extract_log_outlook_mails() -> pd.DataFrame:
    """
    Extracts the history of the conversations from the local instance of Microsoft Outlook
    running on the current computer.

    CASE ID (case:concept:name) => identifier of the conversation
    ACTIVITY (concept:name) => activity that is performed in the current item (send e-mail, receive e-mail,
                                                                                refuse meeting ...)
    TIMESTAMP (time:timestamp) => timestamp of creation of the item in Outlook
    RESOURCE (org:resource) => sender of the current item

    See also:
    * https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.mailitem?redirectedfrom=MSDN&view=outlook-pia#properties_
    * https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.olobjectclass?view=outlook-pia

    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_outlook_mails()
    """
    from pm4py.algo.connectors.variants import outlook_mail_extractor
    return outlook_mail_extractor.apply()


def extract_log_outlook_calendar(email_user: Optional[str] = None, calendar_id: int = 9) -> pd.DataFrame:
    """
    Extracts the history of the calendar events (creation, update, start, end)
    in a Pandas dataframe from the local Outlook instance running on the current computer.

    CASE ID (case:concept:name) => identifier of the meeting
    ACTIVITY (concept:name) => one between: Meeting Created, Last Change of Meeting, Meeting Started, Meeting Completed
    TIMESTAMP (time:timestamp) => the timestamp of the event
    case:subject => the subject of the meeting

    :param email_user: (optional) e-mail address from which the (shared) calendar should be extracted
    :param calendar_id: identifier of the calendar for the given user (default: 9)

    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_outlook_calendar()
        dataframe = pm4py.connectors.extract_log_outlook_calendar("[email protected]")
    """
    from pm4py.algo.connectors.variants import outlook_calendar
    parameters = {}
    parameters[outlook_calendar.Parameters.EMAIL_USER] = email_user
    parameters[outlook_calendar.Parameters.CALENDAR_ID] = calendar_id
    return outlook_calendar.apply(parameters=parameters)


def extract_log_windows_events() -> pd.DataFrame:
    """
    Extract a process mining dataframe from all the events recorded in the Windows registry.

    CASE ID (case:concept:name) => name of the computer emitting the events.
    ACTIVITY (concept:name)  => concatenation of the source name of the event and the event identifier
                (see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
    TIMESTAMP (time:timestamp) => timestamp of generation of the event
    RESOURCE (org:resource) => username involved in the event

    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_windows_events()
    """
    from pm4py.algo.connectors.variants import windows_events
    return windows_events.apply()


def extract_log_chrome_history(history_db_path: Optional[str] = None) -> pd.DataFrame:
    """
    Extracts a dataframe containing the navigation history of Google Chrome.
    Please keep Google Chrome history closed when extracting.

    CASE ID (case:concept:name) => an identifier of the profile that has been extracted
    ACTIVITY (concept:name) => the complete path of the website, minus the GET arguments
    TIMESTAMP (time:timestamp) => the timestamp of visit

    :param history_db_path: path to the history DB path of Google Chrome (default: position of the Windows folder)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_chrome_history()
    """
    from pm4py.algo.connectors.variants import chrome_history
    parameters = {}
    if history_db_path is not None:
        parameters[chrome_history.Parameters.HISTORY_DB_PATH] = history_db_path
    return chrome_history.apply(parameters=parameters)


def extract_log_firefox_history(history_db_path: Optional[str] = None) -> pd.DataFrame:
    """
    Extracts a dataframe containing the navigation history of Mozilla Firefox.
    Please keep Google Chrome history closed when extracting.

    CASE ID (case:concept:name) => an identifier of the profile that has been extracted
    ACTIVITY (concept:name) => the complete path of the website, minus the GET arguments
    TIMESTAMP (time:timestamp) => the timestamp of visit

    :param history_db_path: path to the history DB path of Mozilla Firefox (default: position of the Windows folder)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_firefox_history()
    """
    from pm4py.algo.connectors.variants import firefox_history
    parameters = {}
    if history_db_path is not None:
        parameters[firefox_history.Parameters.HISTORY_DB_PATH] = history_db_path
    return firefox_history.apply(parameters=parameters)


def extract_log_github(owner: str = "pm4py", repo: str = "pm4py-core", auth_token: Optional[str] = None) -> pd.DataFrame:
    """
    Extracts a dataframe containing the history of the issues of a Github repository.
    According to the API limit rate of public/registered users, only a part of the events
    can be returned.

    :param owner: owner of the repository (e.g., pm4py)
    :param repo: name of the repository (e.g., pm4py-core)
    :param auth_token: authorization token
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_github(owner='pm4py', repo='pm4py-core')
    """
    from pm4py.algo.connectors.variants import github_repo
    parameters = {}
    parameters[github_repo.Parameters.OWNER] = owner
    parameters[github_repo.Parameters.REPOSITORY] = repo
    parameters[github_repo.Parameters.AUTH_TOKEN] = auth_token
    return github_repo.apply(parameters)


def extract_log_camunda_workflow(connection_string: str) -> pd.DataFrame:
    """
    Extracts a dataframe from the Camunda workflow system. Aside from the traditional columns,
    the processID of the process in Camunda is returned.

    :param connection_string: ODBC connection string to the Camunda database
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_camunda_workflow('Driver={PostgreSQL Unicode(x64)};SERVER=127.0.0.3;DATABASE=process-engine;UID=xx;PWD=yy')
    """
    from pm4py.algo.connectors.variants import camunda_workflow
    parameters = {}
    parameters[camunda_workflow.Parameters.CONNECTION_STRING] = connection_string
    return camunda_workflow.apply(None, parameters=parameters)


def extract_log_sap_o2c(connection_string: str, prefix: str = "") -> pd.DataFrame:
    """
    Extracts a dataframe for the SAP O2C process.

    :param connection_string: ODBC connection string to the SAP database
    :param prefix: prefix for the tables (example: SAPSR3.)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_sap_o2c('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
    """
    from pm4py.algo.connectors.variants import sap_o2c
    parameters = {}
    parameters[sap_o2c.Parameters.CONNECTION_STRING] = connection_string
    parameters[sap_o2c.Parameters.PREFIX] = prefix
    return sap_o2c.apply(None, parameters=parameters)


def extract_log_sap_accounting(connection_string: str, prefix: str = "") -> pd.DataFrame:
    """
    Extracts a dataframe for the SAP Accounting process.

    :param connection_string: ODBC connection string to the SAP database
    :param prefix: prefix for the tables (example: SAPSR3.)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_log_sap_accounting('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
    """
    from pm4py.algo.connectors.variants import sap_accounting
    parameters = {}
    parameters[sap_accounting.Parameters.CONNECTION_STRING] = connection_string
    parameters[sap_accounting.Parameters.PREFIX] = prefix
    return sap_accounting.apply(None, parameters=parameters)


def extract_ocel_outlook_mails() -> OCEL:
    """
    Extracts the history of the conversations from the local instance of Microsoft Outlook
    running on the current computer as an object-centric event log.

    ACTIVITY (ocel:activity) => activity that is performed in the current item (send e-mail, receive e-mail,
                                                                                refuse meeting ...)
    TIMESTAMP (ocel:timestamp) => timestamp of creation of the item in Outlook

    Object types:
    - org:resource => the snder of the mail
    - recipients => the list of recipients of the mail
    - topic => the topic of the discussion

    See also:
    * https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.mailitem?redirectedfrom=MSDN&view=outlook-pia#properties_
    * https://learn.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook.olobjectclass?view=outlook-pia

    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        ocel = pm4py.connectors.extract_ocel_outlook_mails()
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_outlook_mails()
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["org:resource", "recipients", "topic"])


def extract_ocel_outlook_calendar(email_user: Optional[str] = None, calendar_id: int = 9) -> OCEL:
    """
    Extracts the history of the calendar events (creation, update, start, end)
    as an object-centric event log from the local Outlook instance running on the current computer.

    ACTIVITY (ocel:activity) => one between: Meeting Created, Last Change of Meeting, Meeting Started, Meeting Completed
    TIMESTAMP (ocel:timestamp) => the timestamp of the event

    Object types:
    - case:concept:name => identifier of the meeting
    - case:subject => the subject of the meeting

    :param email_user: (optional) e-mail address from which the (shared) calendar should be extracted
    :param calendar_id: identifier of the calendar for the given user (default: 9)

    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        ocel = pm4py.connectors.extract_ocel_outlook_calendar()
        ocel = pm4py.connectors.extract_ocel_outlook_calendar("[email protected]")
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_outlook_calendar(email_user, calendar_id)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "case:subject"])


def extract_ocel_windows_events() -> OCEL:
    """
    Extract a process mining dataframe from all the events recorded in the Windows registry as an object-centric
    event log.

    ACTIVITY (concept:name)  => concatenation of the source name of the event and the event identifier
                (see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
    TIMESTAMP (time:timestamp) => timestamp of generation of the event

    Object types:
    - categoryString: translation of the subcategory. The translation is source-specific.
    - computerName: name of the computer that generated this event.
    - eventIdentifier: identifier of the event. This is specific to the source that generated the event log entry.
    - eventType: 1=Error; 2=Warning; 3=Information; 4=Security Audit Success;5=Security Audit Failure;
    - sourceName: name of the source (application, service, driver, or subsystem) that generated the entry.
    - user: user name of the logged-on user when the event occurred. If the user name cannot be determined, this will be NULL.

    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        ocel = pm4py.connectors.extract_ocel_windows_events()
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_windows_events()
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["categoryString", "computerName", "eventIdentifier", "eventType", "sourceName", "user"])


def extract_ocel_chrome_history(history_db_path: Optional[str] = None) -> OCEL:
    """
    Extracts an object-centric event log containing the navigation history of Google Chrome.
    Please keep Google Chrome history closed when extracting.

    ACTIVITY (ocel:activity) => the complete path of the website, minus the GET arguments
    TIMESTAMP (ocel:timestamp) => the timestamp of visit

    Object Types:
    - case:concept:name : the profile of Chrome that is used to visit the site
    - complete_url: the complete URL of the website
    - url_wo_parameters: complete URL minus the part after ?
    - domain: the domain of the website that is visited

    :param history_db_path: path to the history DB path of Google Chrome (default: position of the Windows folder)
    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_ocel_chrome_history()
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_chrome_history(history_db_path)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "complete_url", "url_wo_parameters", "domain"])


def extract_ocel_firefox_history(history_db_path: Optional[str] = None) -> OCEL:
    """
    Extracts an object-centric event log containing the navigation history of Mozilla Firefox.
    Please keep Mozilla Firefox history closed when extracting.

    ACTIVITY (ocel:activity) => the complete path of the website, minus the GET arguments
    TIMESTAMP (ocel:timestamp) => the timestamp of visit

    Object Types:
    - case:concept:name : the profile of Firefox that is used to visit the site
    - complete_url: the complete URL of the website
    - url_wo_parameters: complete URL minus the part after ?
    - domain: the domain of the website that is visited

    :param history_db_path: path to the history DB path of Mozilla Firefox (default: position of the Windows folder)
    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_ocel_firefox_history()
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_firefox_history(history_db_path)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "complete_url", "url_wo_parameters", "domain"])


def extract_ocel_github(owner: str = "pm4py", repo: str = "pm4py-core", auth_token: Optional[str] = None) -> OCEL:
    """
    Extracts a dataframe containing the history of the issues of a Github repository.
    According to the API limit rate of public/registered users, only a part of the events
    can be returned.

    ACTIVITY (ocel:activity) => the event (created, commented, closed, subscribed ...)
    TIMESTAMP (ocel:timestamp) => the timestamp of execution of the event

    Object types:
    - case:concept:name => the URL of the events related to the issue
    - org:resource => the involved resource
    - case:repo => the repository in which the issue is created

    :param owner: owner of the repository (e.g., pm4py)
    :param repo: name of the repository (e.g., pm4py-core)
    :param auth_token: authorization token
    :rtype: ``OCEL``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_ocel_github(owner='pm4py', repo='pm4py-core')
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_github(owner, repo, auth_token)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource", "case:repo"])


def extract_ocel_camunda_workflow(connection_string: str) -> OCEL:
    """
    Extracts an object-centric event log from the Camunda workflow system.

    :param connection_string: ODBC connection string to the Camunda database
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        ocel = pm4py.connectors.extract_ocel_camunda_workflow('Driver={PostgreSQL Unicode(x64)};SERVER=127.0.0.3;DATABASE=process-engine;UID=xx;PWD=yy')
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_camunda_workflow(connection_string)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "processID", "org:resource"])


def extract_ocel_sap_o2c(connection_string: str, prefix: str = '') -> OCEL:
    """
    Extracts an object-centric event log for the SAP O2C process.

    :param connection_string: ODBC connection string to the SAP database
    :param prefix: prefix for the tables (example: SAPSR3.)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_ocel_sap_o2c('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_sap_o2c(connection_string, prefix=prefix)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource"])


def extract_ocel_sap_accounting(connection_string: str, prefix: str = '') -> OCEL:
    """
    Extracts an object-centric event log for the SAP Accounting process.

    :param connection_string: ODBC connection string to the SAP database
    :param prefix: prefix for the tables (example: SAPSR3.)
    :rtype: ``pd.DataFrame``

    .. code-block:: python3
        import pm4py

        dataframe = pm4py.connectors.extract_ocel_sap_accounting('Driver={Oracle in instantclient_21_6};DBQ=127.0.0.3:1521/ZIB;UID=xx;PWD=yy')
    """
    import pm4py
    dataframe = pm4py.connectors.extract_log_sap_accounting(connection_string, prefix=prefix)
    return pm4py.convert_log_to_ocel(dataframe, "concept:name", "time:timestamp", ["case:concept:name", "org:resource"])