File size: 4,158 Bytes
e60e568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''

from typing import Optional, Dict, Any
import pandas as pd
from datetime import datetime
from pm4py.util import pandas_utils
import importlib.util


def apply(parameters: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
    """
    Extract a process mining dataframe from all the events recorded in the Windows registry.

    CASE ID (case:concept:name) => name of the computer emitting the events.
    ACTIVITY (concept:name)  => concatenation of the source name of the event and the event identifier
                (see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
    TIMESTAMP (time:timestamp) => timestamp of generation of the event
    RESOURCE (org:resource) => username involved in the event

    Returns
    ----------------
    dataframe
        Pandas dataframe
    """
    if parameters is None:
        parameters = {}

    import win32com.client

    print(":: executing SQL query against the Windows registry. this can take time.")

    strComputer = "."
    objWMIService = win32com.client.Dispatch("WbemScripting.SWbemLocator")
    objSWbemServices = objWMIService.ConnectServer(strComputer, "root\cimv2")
    colItems = objSWbemServices.ExecQuery("Select * from Win32_NTLogEvent")
    events = []

    progress = None
    if importlib.util.find_spec("tqdm"):
        from tqdm.auto import tqdm
        progress = tqdm(total=len(colItems),
                        desc="extracting Windows events, progress :: ")

    for objItem in colItems:
        events.append({"category": str(objItem.Properties_("Category")), "categoryString": str(objItem.Properties_("CategoryString")),
                       "computerName": str(objItem.Properties_("ComputerName")), "eventCode": str(objItem.Properties_("EventCode")),
                       "eventIdentifier": str(objItem.Properties_("EventIdentifier")), "eventType": str(objItem.Properties_("EventType")),
                       "logFile": str(objItem.Properties_("LogFile")), "message": str(objItem.Properties_("Message")),
                       "recordNumber": str(objItem.Properties_("RecordNumber")),
                       "sourceName": str(objItem.Properties_("SourceName")),
                       "timeGenerated": datetime.strptime(str(str(objItem.Properties_("TimeGenerated"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
                       "timeWritten": datetime.strptime(str(str(objItem.Properties_("TimeWritten"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
                       "type": str(str(objItem.Properties_("Type"))), "user": str(str(objItem.Properties_("User")))})
        if progress is not None:
            progress.update()

    if progress is not None:
        progress.close()

    dataframe = pandas_utils.instantiate_dataframe(events)
    dataframe["case:concept:name"] = dataframe["computerName"]
    dataframe["time:timestamp"] = dataframe["timeGenerated"]
    dataframe["concept:name"] = dataframe["sourceName"] + " " + dataframe["eventIdentifier"]
    dataframe["org:resource"] = dataframe["user"]
    dataframe = pandas_utils.insert_index(dataframe, "@@index", copy_dataframe=False, reset_index=False)
    dataframe = dataframe.sort_values(["time:timestamp", "@@index"])
    dataframe["@@case_index"] = dataframe.groupby("case:concept:name", sort=False).ngroup()
    dataframe = dataframe.sort_values(["@@case_index", "time:timestamp", "@@index"])

    return dataframe